From f9fd3119fabf2c71a3c7f0a9c2ab803421709d89 Mon Sep 17 00:00:00 2001 From: phil Date: Thu, 28 Oct 2004 22:17:27 +0000 Subject: [PATCH] merge b1_2 updates into b1_4: - add mkfsoptions to LDAP (4679) - use ->max_readahead method instead of zapping global ra (5039) - don't interrupt __l_wait_event() during strace * miscellania - add software watchdogs to catch hung threads quickly (4941) - make lustrefs init script start after nfs is mounted --- .../patches/ext3-mballoc2-2.6-suse.patch | 6 +- lustre/ChangeLog | 6 + lustre/conf/lustre.dtd | 5 +- lustre/conf/lustre2ldif.xsl | 6 + lustre/include/linux/lustre_compat25.h | 2 +- lustre/include/linux/lustre_lib.h | 30 +- lustre/include/linux/lustre_net.h | 6 +- lustre/include/linux/obd_support.h | 1 + .../patches/export-show_task-2.4-rh.patch | 171 + .../patches/export-show_task-2.4-rhel.patch | 198 + .../patches/export-show_task-2.4-vanilla.patch | 34 + .../patches/export-show_task-2.6-vanilla.patch | 21 + .../patches/ext3-extents-2.6.7.patch | 2844 --- .../patches/ext3-mballoc2-2.6-suse.patch | 6 +- .../kernel_patches/patches/iopen-2.6-vanilla.patch | 476 - .../patches/kernel_text_address-2.4.18-chaos.patch | 40 - .../patches/kernel_text_address-2.4.20-rh.patch | 68 - .../kernel_text_address-2.4.20-vanilla.patch | 116 - .../kernel_text_address-2.4.22-vanilla.patch | 59 - .../patches/lookup_bdev_init_intent.patch | 11 - lustre/kernel_patches/patches/lustre_version.patch | 3 +- .../patches/nfs-cifs-intent-2.6-vanilla.patch | 117 - .../kernel_patches/patches/uml-2.6.7-01-bb2.patch | 20388 ------------------- .../patches/vfs_intent-2.6-vanilla.patch | 799 - .../patches/vfs_nointent-2.6-vanilla.patch | 485 - .../patches/vfs_races-2.6-vanilla.patch | 65 - lustre/kernel_patches/series/2.6-suse.series | 1 + lustre/kernel_patches/series/2.6-vanilla.series | 15 - lustre/kernel_patches/series/chaos-2.4.21 | 1 - lustre/kernel_patches/series/hp-pnnl-2.4.20 | 1 - .../series/ldiskfs-2.6-vanilla.series | 11 - lustre/kernel_patches/series/rh-2.4.20 | 2 +- lustre/kernel_patches/series/rh-2.4.22 | 1 - lustre/kernel_patches/series/rhel-2.4.21 | 2 +- lustre/kernel_patches/series/suse-2.4.21-171 | 1 - lustre/kernel_patches/series/suse-2.4.21-2 | 1 - lustre/kernel_patches/series/vanilla-2.4.24 | 3 +- lustre/ldlm/l_lock.c | 4 +- lustre/ldlm/ldlm_flock.c | 47 +- lustre/ldlm/ldlm_lockd.c | 4 +- lustre/ldlm/ldlm_request.c | 37 +- lustre/llite/file.c | 22 +- lustre/llite/llite_internal.h | 13 +- lustre/llite/llite_lib.c | 65 +- lustre/llite/lproc_llite.c | 24 +- lustre/llite/rw.c | 18 +- lustre/llite/rw24.c | 14 +- lustre/llite/rw26.c | 2 +- lustre/mds/handler.c | 7 + lustre/mds/mds_internal.h | 2 + lustre/mgmt/mgmt_svc.c | 7 +- lustre/obdclass/class_obd.c | 2 +- lustre/ost/ost_handler.c | 5 +- lustre/portals/archdep.m4 | 35 + lustre/portals/include/linux/kp30.h | 6 +- lustre/portals/include/linux/libcfs.h | 30 +- lustre/portals/include/linux/portals_compat25.h | 12 +- lustre/portals/libcfs/Makefile.in | 2 +- lustre/portals/libcfs/debug.c | 108 +- lustre/portals/libcfs/watchdog.c | 388 + lustre/ptlbd/server.c | 2 +- lustre/ptlrpc/service.c | 16 +- lustre/scripts/lustrefs | 2 +- lustre/utils/lfs.c | 47 +- 64 files changed, 1215 insertions(+), 25708 deletions(-) create mode 100644 lustre/kernel_patches/patches/export-show_task-2.4-rh.patch create mode 100644 lustre/kernel_patches/patches/export-show_task-2.4-rhel.patch create mode 100644 lustre/kernel_patches/patches/export-show_task-2.4-vanilla.patch create mode 100644 lustre/kernel_patches/patches/export-show_task-2.6-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.6.7.patch delete mode 100644 lustre/kernel_patches/patches/iopen-2.6-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/kernel_text_address-2.4.18-chaos.patch delete mode 100644 lustre/kernel_patches/patches/kernel_text_address-2.4.20-rh.patch delete mode 100644 lustre/kernel_patches/patches/kernel_text_address-2.4.20-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/kernel_text_address-2.4.22-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/lookup_bdev_init_intent.patch delete mode 100644 lustre/kernel_patches/patches/nfs-cifs-intent-2.6-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch delete mode 100644 lustre/kernel_patches/patches/vfs_intent-2.6-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/vfs_nointent-2.6-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/vfs_races-2.6-vanilla.patch delete mode 100644 lustre/kernel_patches/series/2.6-vanilla.series delete mode 100644 lustre/kernel_patches/series/ldiskfs-2.6-vanilla.series create mode 100644 lustre/portals/libcfs/watchdog.c diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch index 6517722..7c3d8bd 100644 --- a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch +++ b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch @@ -1650,9 +1650,9 @@ Index: linux-stage/include/linux/ext3_fs.h */ @@ -336,6 +338,7 @@ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x10000 /* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x20000 /* Extents debug */ -+#define EXT3_MOUNT_MBALLOC 0x100000/* Buddy allocation support */ + #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ + #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ ++#define EXT3_MOUNT_MBALLOC 0x400000/* Buddy allocation support */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 7aed4ee..a59b027e 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -20,6 +20,12 @@ tbd Cluster File Systems, Inc. - use transno after validating reply (3892) - process timed out requests if import state changes (3754) - update mtime on OST during writes, return in glimpse (4829) + - add mkfsoptions to LDAP (4679) + - use ->max_readahead method instead of zapping global ra (5039) + - don't interrupt __l_wait_event() during strace + * miscellania + - add software watchdogs to catch hung threads quickly (4941) + - make lustrefs init script start after nfs is mounted 2004-10-07 Cluster File Systems, Inc. * version 1.2.7 diff --git a/lustre/conf/lustre.dtd b/lustre/conf/lustre.dtd index 8fd57d8..0e8ad33 100644 --- a/lustre/conf/lustre.dtd +++ b/lustre/conf/lustre.dtd @@ -53,7 +53,7 @@ + target_ref | node_ref | journalsize | mkfsoptions)*> @@ -71,7 +71,7 @@ failover ( 1 | 0 ) #IMPLIED> + target_ref | node_ref | journalsize | mkfsoptions)*> @@ -90,6 +90,7 @@ + diff --git a/lustre/conf/lustre2ldif.xsl b/lustre/conf/lustre2ldif.xsl index 3713ec8..8c3c24a 100644 --- a/lustre/conf/lustre2ldif.xsl +++ b/lustre/conf/lustre2ldif.xsl @@ -122,6 +122,9 @@ devsize: journalsize: + +mkfsoptions: + nodeRef: targetRef: @@ -173,6 +176,9 @@ devsize: journalsize: + +mkfsoptions: + diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 95c462f..44e1a57 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -113,7 +113,7 @@ static inline int cleanup_group_info(void) page->private = 0; \ } while(0) -#define smp_num_cpus NR_CPUS +#define smp_num_cpus num_online_cpus() #include diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index bcb4bbe..e518b93 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -594,6 +594,7 @@ do { \ int __timed_out = 0; \ unsigned long irqflags; \ sigset_t blocked; \ + signed long timeout_remaining; \ \ init_waitqueue_entry(&__wait, current); \ if (excl) \ @@ -607,12 +608,15 @@ do { \ else \ blocked = l_w_e_set_sigs(0); \ \ + timeout_remaining = info->lwi_timeout; \ + \ for (;;) { \ set_current_state(TASK_INTERRUPTIBLE); \ if (condition) \ break; \ if (info->lwi_timeout && !__timed_out) { \ - if (schedule_timeout(info->lwi_timeout) == 0) { \ + timeout_remaining = schedule_timeout(timeout_remaining); \ + if (timeout_remaining == 0) { \ __timed_out = 1; \ if (!info->lwi_on_timeout || \ info->lwi_on_timeout(info->lwi_cb_data)) { \ @@ -629,10 +633,20 @@ do { \ if (condition) \ break; \ if (signal_pending(current)) { \ - if (info->lwi_on_signal) \ - info->lwi_on_signal(info->lwi_cb_data); \ - ret = -EINTR; \ - break; \ + if (__timed_out) { \ + break; \ + } else { \ + /* We have to do this here because some signals */ \ + /* are not blockable - ie from strace(1). */ \ + /* In these cases we want to schedule_timeout() */ \ + /* again, because we don't want that to return */ \ + /* -EINTR when the RPC actually succeeded. */ \ + /* the RECALC_SIGPENDING below will deliver the */ \ + /* signal properly. */ \ + SIGNAL_MASK_LOCK(current, irqflags); \ + CLEAR_SIGPENDING; \ + SIGNAL_MASK_UNLOCK(current, irqflags); \ + } \ } \ } \ \ @@ -641,6 +655,12 @@ do { \ RECALC_SIGPENDING; \ SIGNAL_MASK_UNLOCK(current, irqflags); \ \ + if (__timed_out && signal_pending(current)) { \ + if (info->lwi_on_signal) \ + info->lwi_on_signal(info->lwi_cb_data); \ + ret = -EINTR; \ + } \ + \ current->state = TASK_RUNNING; \ remove_wait_queue(&wq, &__wait); \ } while(0) diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 87064fb..30a2780 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -477,7 +477,8 @@ struct ptlrpc_service { int srv_n_difficult_replies; /* # 'difficult' replies */ int srv_n_active_reqs; /* # reqs being served */ int srv_rqbd_timeout; /* timeout before re-posting reqs */ - + int srv_watchdog_timeout; /* soft watchdog timeout, in ms */ + __u32 srv_req_portal; __u32 srv_rep_portal; @@ -644,7 +645,8 @@ void ptlrpc_save_lock (struct ptlrpc_request *req, void ptlrpc_commit_replies (struct obd_device *obd); void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs); struct ptlrpc_service *ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, - int req_portal, int rep_portal, + int req_portal, int rep_portal, + int watchdog_timeout, /* in ms */ svc_handler_t, char *name, struct proc_dir_entry *proc_entry); void ptlrpc_stop_all_threads(struct ptlrpc_service *svc); diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 01f8b33..9a014ec 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -84,6 +84,7 @@ extern wait_queue_head_t obd_race_waitq; #define OBD_FAIL_MDS_DONE_WRITING_NET 0x126 #define OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 #define OBD_FAIL_MDS_ALLOC_OBDO 0x128 +#define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x129 #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 diff --git a/lustre/kernel_patches/patches/export-show_task-2.4-rh.patch b/lustre/kernel_patches/patches/export-show_task-2.4-rh.patch new file mode 100644 index 0000000..3799348 --- /dev/null +++ b/lustre/kernel_patches/patches/export-show_task-2.4-rh.patch @@ -0,0 +1,171 @@ +Index: linux-2.4.20/kernel/ksyms.c +=================================================================== +--- linux-2.4.20.orig/kernel/ksyms.c 2004-10-21 21:30:14.000000000 -0400 ++++ linux-2.4.20/kernel/ksyms.c 2004-10-21 21:32:00.000000000 -0400 +@@ -75,6 +75,7 @@ + extern spinlock_t dma_spin_lock; + extern int panic_timeout; + ++extern void show_task(task_t *); + + #ifdef CONFIG_MODVERSIONS + const struct module_symbol __export_Using_Versions +@@ -636,3 +637,4 @@ + extern void check_tasklist_locked(void); + EXPORT_SYMBOL_GPL(check_tasklist_locked); + EXPORT_SYMBOL(dump_stack); ++EXPORT_SYMBOL(show_task); +Index: linux-2.4.20/arch/i386/kernel/traps.c +=================================================================== +--- linux-2.4.20.orig/arch/i386/kernel/traps.c 2004-10-21 21:30:15.000000000 -0400 ++++ linux-2.4.20/arch/i386/kernel/traps.c 2004-10-25 14:34:41.000000000 -0400 +@@ -137,29 +137,141 @@ + + #endif + +-void show_trace(unsigned long * stack) ++void scan_stack (unsigned long *stack) + { + int i; + unsigned long addr; +- /* static to not take up stackspace; if we race here too bad */ +- static char buffer[512]; ++ /* static to not take up stackspace */ ++ static char buffer[NR_CPUS][512], *bufp; + +- if (!stack) +- stack = (unsigned long*)&stack; ++ bufp = buffer[smp_processor_id()]; + +- printk("Call Trace: "); ++ /* ++ * If we have frame pointers then use them to get ++ * a 100% exact backtrace, up until the entry frame: ++ */ + i = 1; + while (((long) stack & (THREAD_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { +- lookup_symbol(addr, buffer, 512); +- printk("[<%08lx>] %s (0x%x))\n", addr,buffer,stack-1); ++ lookup_symbol(addr, bufp, 512); ++ printk("[<%08lx>] %s (0x%p)\n", addr,bufp,stack-1); + i++; + } + } ++} ++ ++#if CONFIG_FRAME_POINTER ++void show_stack_frame_params (int param_count, unsigned long params[]) ++{ ++ int i; ++ unsigned long *p, task_addr, stack_base; ++ ++ if (param_count <= 0) ++ return; ++ ++ task_addr = (unsigned long) current; ++ stack_base = task_addr + THREAD_SIZE - 1; ++ ++ printk(" ("); ++ ++ for (i = 0, p = params; ++ ((param_count - i) > 1) && (p >= task_addr) && (p <= stack_base); ++ i++, p++) { ++ printk("0x%x, ", *p); ++ ++ if ((i % 4) == 3) ++ printk("\n "); ++ } ++ ++ if ((p >= task_addr) && (p <= stack_base)) ++ printk("0x%x)\n", *p); ++} ++ ++/* Display a stack trace for the currently executing task. The 'dummy' ++ * parameter serves a purpose although its value is unused. We use the ++ * address of 'dummy' as a reference point for finding the saved %ebp register ++ * value on the stack. ++ */ ++void frame_pointer_walk (void *dummy) ++{ ++ int i; ++ unsigned long addr, task_addr, *frame_ptr, *next_frame_ptr, *eip_ptr, ++ eip, stack_base; ++ /* static to not take up stackspace */ ++ static char buffer[NR_CPUS][512], *bufp; ++ ++ bufp = buffer[smp_processor_id()]; ++ task_addr = (unsigned long) current; ++ stack_base = task_addr + THREAD_SIZE - 1; ++ frame_ptr = (unsigned long *) (&dummy - 2); ++ ++ for (; ; ) { ++ next_frame_ptr = (unsigned long *) (*frame_ptr); ++ addr = (unsigned long) next_frame_ptr; ++ ++ /* Stop when we reach a frame pointer that points to a ++ * location clearly outside our own kernel stack. ++ */ ++ if ((addr < task_addr) || (addr > stack_base)) ++ break; ++ ++ eip_ptr = frame_ptr + 1; ++ eip = *eip_ptr; ++ ++ if (kernel_text_address(eip)) { ++ lookup_symbol(eip, bufp, 512); ++ show_stack_frame_params(4, frame_ptr + 2); ++ printk("[<%08lx>] %s (0x%x)\n", eip, bufp, ++ eip_ptr); ++ } ++ ++ frame_ptr = next_frame_ptr; ++ } ++} ++ ++typedef void (*stack_trace_fn_t) (unsigned long *stack); ++ ++void show_trace(unsigned long * stack) ++{ ++ static const stack_trace_fn_t trace_fn_vector[] = ++ { scan_stack, frame_pointer_walk }; ++ unsigned long addr, task_addr, stack_base; ++ int task_is_current; ++ ++ if (!stack) ++ stack = (unsigned long*)&stack; ++ ++ printk("Call Trace:\n"); ++ addr = (unsigned long) stack; ++ task_addr = (unsigned long) current; ++ stack_base = task_addr + THREAD_SIZE - 1; ++ task_is_current = (addr >= task_addr) && (addr <= stack_base); ++ ++ /* We may use frame pointers to do a stack trace only if the current ++ * task is being traced. Tracing some other task in this manner ++ * would require a saved %ebp register value. Perhaps in the future ++ * I'll consider providing a means of obtaining this. ++ */ ++ trace_fn_vector[task_is_current](stack); ++ + printk("\n"); + } + ++#else /* CONFIG_FRAME_POINTER */ ++ ++void show_trace(unsigned long * stack) ++{ ++ if (!stack) ++ stack = (unsigned long*)&stack; ++ ++ printk("Call Trace:\n"); ++ scan_stack(stack); ++ printk("\n"); ++} ++ ++#endif /* CONFIG_FRAME_POINTER */ ++ + void show_trace_task(struct task_struct *tsk) + { + unsigned long esp = tsk->thread.esp; diff --git a/lustre/kernel_patches/patches/export-show_task-2.4-rhel.patch b/lustre/kernel_patches/patches/export-show_task-2.4-rhel.patch new file mode 100644 index 0000000..e71b775 --- /dev/null +++ b/lustre/kernel_patches/patches/export-show_task-2.4-rhel.patch @@ -0,0 +1,198 @@ +Index: linux-2.4.21/kernel/ksyms.c +=================================================================== +--- linux-2.4.21.orig/kernel/ksyms.c 2004-10-20 18:48:05.000000000 -0400 ++++ linux-2.4.21/kernel/ksyms.c 2004-10-21 12:46:05.000000000 -0400 +@@ -86,6 +86,7 @@ + }; + #endif + ++extern void show_task(task_t *); + + EXPORT_SYMBOL(inter_module_register); + EXPORT_SYMBOL(inter_module_unregister); +@@ -670,6 +671,7 @@ + extern void check_tasklist_locked(void); + EXPORT_SYMBOL_GPL(check_tasklist_locked); + EXPORT_SYMBOL(dump_stack); ++EXPORT_SYMBOL(show_task); + + EXPORT_SYMBOL_GPL(netdump_func); + EXPORT_SYMBOL_GPL(netdump_mode); +Index: linux-2.4.21/arch/i386/kernel/traps.c +=================================================================== +--- linux-2.4.21.orig/arch/i386/kernel/traps.c 2004-10-20 18:48:05.000000000 -0400 ++++ linux-2.4.21/arch/i386/kernel/traps.c 2004-10-21 12:46:50.000000000 -0400 +@@ -133,48 +133,142 @@ + + #endif + +-void show_trace(unsigned long * stack) ++void scan_stack (unsigned long *stack) + { +-#if !CONFIG_FRAME_POINTER + int i; +-#endif + unsigned long addr; +- /* static to not take up stackspace; if we race here too bad */ +- static char buffer[512]; ++ /* static to not take up stackspace */ ++ static char buffer[NR_CPUS][512], *bufp; + +- if (!stack) +- stack = (unsigned long*)&stack; ++ bufp = buffer[smp_processor_id()]; + +- printk("Call Trace: "); + /* + * If we have frame pointers then use them to get + * a 100% exact backtrace, up until the entry frame: + */ +-#if CONFIG_FRAME_POINTER +-#define DO(n) \ +- addr = (int)__builtin_return_address(n); \ +- if (!kernel_text_address(addr)) \ +- goto out; \ +- lookup_symbol(addr, buffer, 512); \ +- printk("[<%08lx>] %s\n", addr, buffer); +- +- DO(0); DO(1); DO(2); DO(3); DO(4); DO(5); DO(7); DO(8); DO(9); +- DO(10); DO(11); DO(12); DO(13); DO(14); DO(15); DO(17); DO(18); DO(19); +-out: +-#else + i = 1; + while (((long) stack & (THREAD_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { +- lookup_symbol(addr, buffer, 512); +- printk("[<%08lx>] %s (0x%p)\n", addr,buffer,stack-1); ++ lookup_symbol(addr, bufp, 512); ++ printk("[<%08lx>] %s (0x%p)\n", addr,bufp,stack-1); + i++; + } + } +-#endif ++} ++ ++#if CONFIG_FRAME_POINTER ++void show_stack_frame_params (int param_count, unsigned long params[]) ++{ ++ int i; ++ unsigned long *p, task_addr, stack_base; ++ ++ if (param_count <= 0) ++ return; ++ ++ task_addr = (unsigned long) current; ++ stack_base = task_addr + THREAD_SIZE - 1; ++ ++ printk(" ("); ++ ++ for (i = 0, p = params; ++ ((param_count - i) > 1) && (p >= task_addr) && (p <= stack_base); ++ i++, p++) { ++ printk("0x%x, ", *p); ++ ++ if ((i % 4) == 3) ++ printk("\n "); ++ } ++ ++ if ((p >= task_addr) && (p <= stack_base)) ++ printk("0x%x)\n", *p); ++} ++ ++/* Display a stack trace for the currently executing task. The 'dummy' ++ * parameter serves a purpose although its value is unused. We use the ++ * address of 'dummy' as a reference point for finding the saved %ebp register ++ * value on the stack. ++ */ ++void frame_pointer_walk (void *dummy) ++{ ++ int i; ++ unsigned long addr, task_addr, *frame_ptr, *next_frame_ptr, *eip_ptr, ++ eip, stack_base; ++ /* static to not take up stackspace */ ++ static char buffer[NR_CPUS][512], *bufp; ++ ++ bufp = buffer[smp_processor_id()]; ++ task_addr = (unsigned long) current; ++ stack_base = task_addr + THREAD_SIZE - 1; ++ frame_ptr = (unsigned long *) (&dummy - 2); ++ ++ for (; ; ) { ++ next_frame_ptr = (unsigned long *) (*frame_ptr); ++ addr = (unsigned long) next_frame_ptr; ++ ++ /* Stop when we reach a frame pointer that points to a ++ * location clearly outside our own kernel stack. ++ */ ++ if ((addr < task_addr) || (addr > stack_base)) ++ break; ++ ++ eip_ptr = frame_ptr + 1; ++ eip = *eip_ptr; ++ ++ if (kernel_text_address(eip)) { ++ lookup_symbol(eip, bufp, 512); ++ show_stack_frame_params(4, frame_ptr + 2); ++ printk("[<%08lx>] %s (0x%x)\n", eip, bufp, ++ eip_ptr); ++ } ++ ++ frame_ptr = next_frame_ptr; ++ } ++} ++ ++typedef void (*stack_trace_fn_t) (unsigned long *stack); ++ ++void show_trace(unsigned long * stack) ++{ ++ static const stack_trace_fn_t trace_fn_vector[] = ++ { scan_stack, frame_pointer_walk }; ++ unsigned long addr, task_addr, stack_base; ++ int task_is_current; ++ ++ if (!stack) ++ stack = (unsigned long*)&stack; ++ ++ printk("Call Trace:\n"); ++ addr = (unsigned long) stack; ++ task_addr = (unsigned long) current; ++ stack_base = task_addr + THREAD_SIZE - 1; ++ task_is_current = (addr >= task_addr) && (addr <= stack_base); ++ ++ /* We may use frame pointers to do a stack trace only if the current ++ * task is being traced. Tracing some other task in this manner ++ * would require a saved %ebp register value. Perhaps in the future ++ * I'll consider providing a means of obtaining this. ++ */ ++ trace_fn_vector[task_is_current](stack); ++ ++ printk("\n"); ++} ++ ++#else /* CONFIG_FRAME_POINTER */ ++ ++void show_trace(unsigned long * stack) ++{ ++ if (!stack) ++ stack = (unsigned long*)&stack; ++ ++ printk("Call Trace:\n"); ++ scan_stack(stack); + printk("\n"); + } + ++#endif /* CONFIG_FRAME_POINTER */ ++ ++ + void show_trace_task(struct task_struct *tsk) + { + unsigned long esp = tsk->thread.esp; +@@ -182,6 +276,7 @@ + /* User space on another CPU? */ + if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1)) + return; ++ printk("skipping trace...\n"); + show_trace((unsigned long *)esp); + } + diff --git a/lustre/kernel_patches/patches/export-show_task-2.4-vanilla.patch b/lustre/kernel_patches/patches/export-show_task-2.4-vanilla.patch new file mode 100644 index 0000000..4e05709 --- /dev/null +++ b/lustre/kernel_patches/patches/export-show_task-2.4-vanilla.patch @@ -0,0 +1,34 @@ +Index: linux-2.4.24/kernel/ksyms.c +=================================================================== +--- linux-2.4.24.orig/kernel/ksyms.c 2004-10-22 14:00:00.000000000 -0400 ++++ linux-2.4.24/kernel/ksyms.c 2004-10-22 14:03:54.000000000 -0400 +@@ -70,6 +70,8 @@ + extern spinlock_t dma_spin_lock; + extern int panic_timeout; + ++extern void show_task(struct task_struct *); ++ + #ifdef CONFIG_MODVERSIONS + const struct module_symbol __export_Using_Versions + __attribute__((section("__ksymtab"))) = { +@@ -619,6 +621,7 @@ + + /* debug */ + EXPORT_SYMBOL(dump_stack); ++EXPORT_SYMBOL(show_task); + + /* To match ksyms with System.map */ + extern const char _end[]; +Index: linux-2.4.24/kernel/sched.c +=================================================================== +--- linux-2.4.24.orig/kernel/sched.c 2003-11-28 13:26:21.000000000 -0500 ++++ linux-2.4.24/kernel/sched.c 2004-10-22 14:10:10.000000000 -0400 +@@ -1177,7 +1177,7 @@ + return retval; + } + +-static void show_task(struct task_struct * p) ++void show_task(struct task_struct * p) + { + unsigned long free = 0; + int state; diff --git a/lustre/kernel_patches/patches/export-show_task-2.6-vanilla.patch b/lustre/kernel_patches/patches/export-show_task-2.6-vanilla.patch new file mode 100644 index 0000000..81f62ff --- /dev/null +++ b/lustre/kernel_patches/patches/export-show_task-2.6-vanilla.patch @@ -0,0 +1,21 @@ +Index: linux-2.6.5-SLES9_SP1_BRANCH_2004102113353091/kernel/sched.c +=================================================================== +--- linux-2.6.5-SLES9_SP1_BRANCH_2004102113353091.orig/kernel/sched.c 2004-10-22 15:25:05.000000000 -0400 ++++ linux-2.6.5-SLES9_SP1_BRANCH_2004102113353091/kernel/sched.c 2004-10-22 15:39:18.000000000 -0400 +@@ -3147,7 +3147,7 @@ + return list_entry(p->sibling.next,struct task_struct,sibling); + } + +-static void show_task(task_t * p) ++void show_task(task_t * p) + { + task_t *relative; + unsigned state; +@@ -3200,6 +3200,7 @@ + if (state != TASK_RUNNING) + show_stack(p, NULL); + } ++EXPORT_SYMBOL(show_task); + + void show_state(void) + { diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.7.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.7.patch deleted file mode 100644 index 597038e..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.7.patch +++ /dev/null @@ -1,2844 +0,0 @@ -%patch -Index: linux-2.6.7/fs/ext3/extents.c -=================================================================== ---- linux-2.6.7.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.6.7/fs/ext3/extents.c 2004-09-12 20:07:35.000000000 +0400 -@@ -0,0 +1,2306 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+/* -+ * Extents support for EXT3 -+ * -+ * TODO: -+ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() -+ * - ext3_ext_calc_credits() could take 'mergable' into account -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - smart tree reduction -+ * - arch-independence -+ * common on-disk format for big/little-endian arch -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+static int inline -+ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->get_write_access) -+ return tree->ops->get_write_access(h,tree->buffer); -+ else -+ return 0; -+} -+ -+static int inline -+ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->mark_buffer_dirty) -+ return tree->ops->mark_buffer_dirty(h,tree->buffer); -+ else -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ -+ if (path->p_bh) { -+ /* path points to block */ -+ err = ext3_journal_get_write_access(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_get_access_for_root(handle, tree); -+ } -+ return err; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ if (path->p_bh) { -+ /* path points to block */ -+ err =ext3_journal_dirty_metadata(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_mark_root_dirty(handle, tree); -+ } -+ return err; -+} -+ -+static int inline -+ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, struct ext3_extent *ex, -+ int *err) -+{ -+ int goal, depth, newblock; -+ struct inode *inode; -+ -+ EXT_ASSERT(tree); -+ if (tree->ops->new_block) -+ return tree->ops->new_block(handle, tree, path, ex, err); -+ -+ inode = tree->inode; -+ depth = EXT_DEPTH(tree); -+ if (path && depth > 0) { -+ goal = path[depth-1].p_block; -+ } else { -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ -+ bg_start = (ei->i_block_group * -+ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ goal = bg_start + colour; -+ } -+ -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, err); -+ return newblock; -+} -+ -+static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *neh; -+ neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation++; -+} -+ -+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 5; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - -+ sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int k, l = path->p_depth; -+ -+ ext_debug(tree, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(tree, " %d->%d", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ } else if (path->p_ext) { -+ ext_debug(tree, " %d:%d:%d", -+ path->p_ext->ee_block, -+ path->p_ext->ee_len, -+ path->p_ext->ee_start); -+ } else -+ ext_debug(tree, " []"); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ int i; -+ -+ if (!path) -+ return; -+ -+ eh = path[depth].p_hdr; -+ ex = EXT_FIRST_EXTENT(eh); -+ -+ for (i = 0; i < eh->eh_entries; i++, ex++) { -+ ext_debug(tree, "%d:%d:%d ", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_drop_refs(struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+} -+ -+/* -+ * binary search for closest index by given block -+ */ -+static inline void -+ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent_idx *ix; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_entries > 0); -+ -+ ext_debug(tree, "binsearch for %d(idx): ", block); -+ -+ path->p_idx = ix = EXT_FIRST_INDEX(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ix[l + k].ei_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ix += l; -+ path->p_idx = ix; -+ ext_debug(tree, " -> %d->%d ", path->p_idx->ei_block, path->p_idx->ei_leaf); -+ -+ while (l++ < r) { -+ if (block < ix->ei_block) -+ break; -+ path->p_idx = ix++; -+ } -+ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent_idx *chix; -+ -+ chix = ix = EXT_FIRST_INDEX(eh); -+ for (k = 0; k < eh->eh_entries; k++, ix++) { -+ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { -+ printk("k=%d, ix=0x%p, first=0x%p\n", k, -+ ix, EXT_FIRST_INDEX(eh)); -+ printk("%u <= %u\n", -+ ix->ei_block,ix[-1].ei_block); -+ } -+ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); -+ if (block < ix->ei_block) -+ break; -+ chix = ix; -+ } -+ EXT_ASSERT(chix == path->p_idx); -+ } -+#endif -+ -+} -+ -+/* -+ * binary search for closest extent by given block -+ */ -+static inline void -+ext3_ext_binsearch(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent *ex; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ -+ if (eh->eh_entries == 0) { -+ /* -+ * this leaf is empty yet: -+ * we get such a leaf in split/add case -+ */ -+ return; -+ } -+ -+ ext_debug(tree, "binsearch for %d: ", block); -+ -+ path->p_ext = ex = EXT_FIRST_EXTENT(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ex[l + k].ee_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ex += l; -+ path->p_ext = ex; -+ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+ while (l++ < r) { -+ if (block < ex->ee_block) -+ break; -+ path->p_ext = ex++; -+ } -+ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent *chex; -+ -+ chex = ex = EXT_FIRST_EXTENT(eh); -+ for (k = 0; k < eh->eh_entries; k++, ex++) { -+ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); -+ if (block < ex->ee_block) -+ break; -+ chex = ex; -+ } -+ EXT_ASSERT(chex == path->p_ext); -+ } -+#endif -+ -+} -+ -+int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *eh; -+ -+ BUG_ON(tree->buffer_len == 0); -+ ext3_ext_get_access_for_root(handle, tree); -+ eh = EXT_ROOT_HDR(tree); -+ eh->eh_depth = 0; -+ eh->eh_entries = 0; -+ eh->eh_magic = EXT3_EXT_MAGIC; -+ eh->eh_max = ext3_ext_space_root(tree); -+ ext3_ext_mark_root_dirty(handle, tree); -+ ext3_ext_invalidate_cache(tree); -+ return 0; -+} -+ -+struct ext3_ext_path * -+ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ struct buffer_head *bh; -+ int depth, i, ppos = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ eh = EXT_ROOT_HDR(tree); -+ EXT_ASSERT(eh); -+ i = depth = EXT_DEPTH(tree); -+ EXT_ASSERT(eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(i == 0 || eh->eh_entries > 0); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[0].p_hdr = eh; -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(tree, "depth %d: num %d, max %d\n", -+ ppos, eh->eh_entries, eh->eh_max); -+ ext3_ext_binsearch_idx(tree, path + ppos, block); -+ path[ppos].p_block = path[ppos].p_idx->ei_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); -+ if (!bh) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ return ERR_PTR(-EIO); -+ } -+ eh = EXT_BLOCK_HDR(bh); -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ path[ppos].p_hdr = eh; -+ i--; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ -+ /* find extent */ -+ ext3_ext_binsearch(tree, path + ppos, block); -+ -+ ext3_ext_show_path(tree, path); -+ -+ return path; -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *curp, -+ int logical, int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->ei_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->ei_block) { -+ /* insert after */ -+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ } -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->ei_block = logical; -+ ix->ei_leaf = ptr; -+ curp->p_hdr->eh_entries++; -+ -+ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); -+ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); -+ -+ err = ext3_ext_dirty(handle, tree, curp); -+ ext3_std_error(tree->inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ unsigned long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].ee_block; -+ ext_debug(tree, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->ee_block; -+ ext_debug(tree, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(unsigned long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); -+ for (a = 0; a < depth - at; a++) { -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 0; -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_depth = 0; -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", -+ path[depth].p_ext->ee_block, -+ path[depth].p_ext->ee_start, -+ path[depth].p_ext->ee_len, -+ newblock); -+ memmove(ex++, path[depth].p_ext++, -+ sizeof(struct ext3_extent)); -+ neh->eh_entries++; -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ path[depth].p_hdr->eh_entries -= m; -+ if ((err = ext3_ext_dirty(handle, tree, path + depth))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(tree, "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 1; -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ neh->eh_depth = depth - i; -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->ei_block = border; -+ fidx->ei_leaf = oldblock; -+ -+ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", -+ i, newblock, border, oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ -+ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(tree, "%d: move %d:%d in new index %lu\n", -+ i, path[i].p_idx->ei_block, -+ path[i].p_idx->ei_leaf, newblock); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->eh_entries++; -+ EXT_ASSERT(neh->eh_entries <= neh->eh_max); -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->eh_entries -= m; -+ err = ext3_ext_dirty(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, tree, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) { -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ } -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct buffer_head *bh; -+ unsigned long newblock; -+ int err = 0; -+ -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ return err; -+ -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(tree->inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); -+ -+ /* set size of new block */ -+ neh = EXT_BLOCK_HDR(bh); -+ /* old root could have indexes or leaves -+ * so calculate e_max right way */ -+ if (EXT_DEPTH(tree)) -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ else -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ goto out; -+ -+ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; -+ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); -+ curp->p_hdr->eh_entries = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ /* FIXME: it works, but actually path[0] can be index */ -+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; -+ curp->p_idx->ei_leaf = newblock; -+ -+ neh = EXT_ROOT_HDR(tree); -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); -+ -+ neh->eh_depth = path->p_depth + 1; -+ err = ext3_ext_dirty(handle, tree, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT_DEPTH(tree); -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, tree, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, tree, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT_DEPTH(tree); -+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static unsigned long -+ext3_ext_next_allocated_block(struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return EXT_MAX_BLOCK; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].ee_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ } -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * returns first allocated block from next leaf or EXT_MAX_BLOCK -+ */ -+static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return EXT_MAX_BLOCK; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex; -+ unsigned long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->ee_block; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ return err; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ break; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+static int inline -+ext3_can_extents_be_merged(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) -+ return 0; -+ -+#ifdef AGRESSIVE_TEST -+ if (ex1->ee_len >= 4) -+ return 0; -+#endif -+ -+ if (!tree->ops->mergable) -+ return 1; -+ -+ return tree->ops->mergable(ex1, ex2); -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex, *fex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int depth, len, err, next; -+ -+ EXT_ASSERT(newext->ee_len > 0); -+ EXT_ASSERT(newext->ee_len < EXT_CACHE_MARK); -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(path[depth].p_hdr); -+ -+ /* try to insert block into found extent and return */ -+ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { -+ ext_debug(tree, "append %d block to %d:%d (from %d)\n", -+ newext->ee_len, ex->ee_block, ex->ee_len, -+ ex->ee_start); -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ return err; -+ ex->ee_len += newext->ee_len; -+ eh = path[depth].p_hdr; -+ nearex = ex; -+ goto merge; -+ } -+ -+repeat: -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) -+ goto has_space; -+ -+ /* probably next leaf has space for us? */ -+ fex = EXT_LAST_EXTENT(eh); -+ next = ext3_ext_next_leaf_block(tree, path); -+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->eh_entries); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->eh_entries, eh->eh_max); -+ } -+ -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ -+has_space: -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len); -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ } else if (newext->ee_block > nearex->ee_block) { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ if (nearex != EXT_LAST_EXTENT(eh)) { -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 2, nearex + 1, len); -+ } -+ path[depth].p_ext = nearex + 1; -+ } else { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ eh->eh_entries++; -+ nearex = path[depth].p_ext; -+ nearex->ee_block = newext->ee_block; -+ nearex->ee_start = newext->ee_start; -+ nearex->ee_len = newext->ee_len; -+ /* FIXME: support for large fs */ -+ nearex->ee_start_hi = 0; -+ -+merge: -+ /* try to merge extents to the right */ -+ while (nearex < EXT_LAST_EXTENT(eh)) { -+ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) -+ break; -+ /* merge with next extent! */ -+ nearex->ee_len += nearex[1].ee_len; -+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { -+ len = (EXT_LAST_EXTENT(eh) - nearex - 1) -+ * sizeof(struct ext3_extent); -+ memmove(nearex + 1, nearex + 2, len); -+ } -+ eh->eh_entries--; -+ EXT_ASSERT(eh->eh_entries > 0); -+ } -+ -+ /* try to merge extents to the left */ -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ if (err) -+ goto cleanup; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ } -+ ext3_ext_tree_changed(tree); -+ ext3_ext_invalidate_cache(tree); -+ return err; -+} -+ -+int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, -+ unsigned long num, ext_prepare_callback func) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_extent *ex, cbex; -+ unsigned long next, start = 0, end = 0; -+ unsigned long last = block + num; -+ int depth, exists, err = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(func); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ while (block < last && block != EXT_MAX_BLOCK) { -+ num = last - block; -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(tree, block, path); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ break; -+ } -+ -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(path[depth].p_hdr); -+ ex = path[depth].p_ext; -+ next = ext3_ext_next_allocated_block(path); -+ -+ exists = 0; -+ if (!ex) { -+ /* there is no extent yet, so try to allocate -+ * all requested space */ -+ start = block; -+ end = block + num; -+ } else if (ex->ee_block > block) { -+ /* need to allocate space before found extent */ -+ start = block; -+ end = ex->ee_block; -+ if (block + num < end) -+ end = block + num; -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ /* need to allocate space after found extent */ -+ start = block; -+ end = block + num; -+ if (end >= next) -+ end = next; -+ } else if (block >= ex->ee_block) { -+ /* -+ * some part of requested space is covered -+ * by found extent -+ */ -+ start = block; -+ end = ex->ee_block + ex->ee_len; -+ if (block + num < end) -+ end = block + num; -+ exists = 1; -+ } else { -+ BUG(); -+ } -+ EXT_ASSERT(end > start); -+ -+ if (!exists) { -+ cbex.ee_block = start; -+ cbex.ee_len = end - start; -+ cbex.ee_start = 0; -+ } else -+ cbex = *ex; -+ -+ EXT_ASSERT(path[depth].p_hdr); -+ err = func(tree, path, &cbex, exists); -+ ext3_ext_drop_refs(path); -+ -+ if (err < 0) -+ break; -+ if (err == EXT_REPEAT) -+ continue; -+ else if (err == EXT_BREAK) { -+ err = 0; -+ break; -+ } -+ -+ if (EXT_DEPTH(tree) != depth) { -+ /* depth was changed. we have to realloc path */ -+ kfree(path); -+ path = NULL; -+ } -+ -+ block = cbex.ee_block + cbex.ee_len; -+ } -+ -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ -+ return err; -+} -+ -+static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, struct ext3_extent *ex) -+{ -+ if (tree->cex) { -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_len); -+ tree->cex->ee_block = ex->ee_block; -+ tree->cex->ee_start = ex->ee_start; -+ tree->cex->ee_len = ex->ee_len; -+ } -+} -+ -+/* -+ * this routine calculate boundaries of the gap requested block fits into -+ * and cache this gap -+ */ -+static inline void -+ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex, gex; -+ -+ if (!tree->cex) -+ return; -+ -+ ex = path[depth].p_ext; -+ if (ex == NULL) { -+ /* there is no extent yet, so gap is [0;-] */ -+ gex.ee_block = 0; -+ gex.ee_len = EXT_CACHE_MARK; -+ ext_debug(tree, "cache gap(whole file):"); -+ } else if (block < ex->ee_block) { -+ gex.ee_block = block; -+ gex.ee_len = ex->ee_block - block; -+ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len); -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ gex.ee_block = ex->ee_block + ex->ee_len; -+ gex.ee_len = ext3_ext_next_allocated_block(path); -+ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) block); -+ EXT_ASSERT(gex.ee_len > gex.ee_block); -+ gex.ee_len = gex.ee_len - gex.ee_block; -+ } else { -+ BUG(); -+ } -+ -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) gex.ee_block, -+ (unsigned long) gex.ee_len); -+ gex.ee_start = EXT_CACHE_MARK; -+ ext3_ext_put_in_cache(tree, &gex); -+} -+ -+static inline int -+ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, -+ struct ext3_extent *ex) -+{ -+ struct ext3_extent *cex = tree->cex; -+ -+ /* is there cache storage at all? */ -+ if (!cex) -+ return 0; -+ -+ /* has cache valid data? */ -+ if (cex->ee_len == 0) -+ return 0; -+ -+ if (block >= cex->ee_block && block < cex->ee_block + cex->ee_len) { -+ ex->ee_block = cex->ee_block; -+ ex->ee_start = cex->ee_start; -+ ex->ee_len = cex->ee_len; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) ex->ee_start); -+ return 1; -+ } -+ -+ /* not in cache */ -+ return 0; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->eh_entries); -+ if ((err = ext3_ext_get_access(handle, tree, path))) -+ return err; -+ path->p_hdr->eh_entries--; -+ if ((err = ext3_ext_dirty(handle, tree, path))) -+ return err; -+ ext_debug(tree, "index is empty, remove it, free block %d\n", -+ path->p_idx->ei_leaf); -+ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); -+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ return err; -+} -+ -+int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT_DEPTH(tree); -+ int needed; -+ -+ if (path) { -+ /* probably there is space in leaf? */ -+ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) -+ return 1; -+ } -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ depth = depth + 1; -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ return needed; -+} -+ -+static int -+ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, tex; -+ struct ext3_ext_path *npath; -+ int depth, creds, err; -+ -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); -+ EXT_ASSERT(ex->ee_block < start); -+ -+ /* calculate tail extent */ -+ tex.ee_block = end + 1; -+ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); -+ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; -+ -+ creds = ext3_ext_calc_credits_for_insert(tree, path); -+ handle = ext3_ext_journal_restart(handle, creds); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ /* calculate head extent. use primary extent */ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ return err; -+ ex->ee_len = start - ex->ee_block; -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ return err; -+ -+ /* FIXME: some callback to free underlying resource -+ * and correct ee_start? */ -+ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", -+ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); -+ -+ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); -+ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); -+ -+ err = ext3_ext_insert_extent(handle, tree, npath, &tex); -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ -+ return err; -+ -+} -+ -+static int -+ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, *fu = NULL, *lu, *le; -+ int err = 0, correct_index = 0; -+ int depth = EXT_DEPTH(tree), credits; -+ struct ext3_extent_header *eh; -+ unsigned a, b, block, num; -+ -+ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); -+ if (!path[depth].p_hdr) -+ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); -+ eh = path[depth].p_hdr; -+ EXT_ASSERT(eh); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* find where to start removing */ -+ le = ex = EXT_LAST_EXTENT(eh); -+ while (ex != EXT_FIRST_EXTENT(eh)) { -+ if (ex->ee_block <= end) -+ break; -+ ex--; -+ } -+ -+ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { -+ /* removal of internal part of the extent requested -+ * tail and head must be placed in different extent -+ * so, we have to insert one more extent */ -+ path[depth].p_ext = ex; -+ return ext3_ext_split_for_rm(handle, tree, path, start, end); -+ } -+ -+ lu = ex; -+ while (ex >= EXT_FIRST_EXTENT(eh) && -+ ex->ee_block + ex->ee_len > start) { -+ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); -+ path[depth].p_ext = ex; -+ -+ a = ex->ee_block > start ? ex->ee_block : start; -+ b = ex->ee_block + ex->ee_len - 1 < end ? -+ ex->ee_block + ex->ee_len - 1 : end; -+ -+ ext_debug(tree, " border %u:%u\n", a, b); -+ -+ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { -+ block = 0; -+ num = 0; -+ BUG(); -+ } else if (a != ex->ee_block) { -+ /* remove tail of the extent */ -+ block = ex->ee_block; -+ num = a - block; -+ } else if (b != ex->ee_block + ex->ee_len - 1) { -+ /* remove head of the extent */ -+ block = a; -+ num = b - a; -+ } else { -+ /* remove whole extent: excelent! */ -+ block = ex->ee_block; -+ num = 0; -+ EXT_ASSERT(a == ex->ee_block && -+ b == ex->ee_block + ex->ee_len - 1); -+ } -+ -+ if (ex == EXT_FIRST_EXTENT(eh)) -+ correct_index = 1; -+ -+ credits = 1; -+ if (correct_index) -+ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; -+ if (tree->ops->remove_extent_credits) -+ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); -+ -+ handle = ext3_ext_journal_restart(handle, credits); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto out; -+ } -+ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ if (tree->ops->remove_extent) -+ err = tree->ops->remove_extent(tree, ex, a, b); -+ if (err) -+ goto out; -+ -+ if (num == 0) { -+ /* this extent is removed entirely mark slot unused */ -+ ex->ee_start = 0; -+ eh->eh_entries--; -+ fu = ex; -+ } -+ -+ ex->ee_block = block; -+ ex->ee_len = num; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ ext_debug(tree, "new extent: %u:%u:%u\n", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ ex--; -+ } -+ -+ if (fu) { -+ /* reuse unused slots */ -+ while (lu < le) { -+ if (lu->ee_start) { -+ *fu = *lu; -+ lu->ee_start = 0; -+ fu++; -+ } -+ lu++; -+ } -+ } -+ -+ if (correct_index && eh->eh_entries) -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) -+ err = ext3_ext_rm_idx(handle, tree, path + depth); -+ -+out: -+ return err; -+} -+ -+ -+static struct ext3_extent_idx * -+ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) -+{ -+ struct ext3_extent_idx *ix; -+ -+ ix = EXT_LAST_INDEX(hdr); -+ while (ix != EXT_FIRST_INDEX(hdr)) { -+ if (ix->ei_block <= block) -+ break; -+ ix--; -+ } -+ return ix; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int inline -+ext3_ext_more_to_rm(struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->eh_entries == path->p_block) -+ return 0; -+ return 1; -+} -+ -+int ext3_ext_remove_space(struct ext3_extents_tree *tree, -+ unsigned long start, unsigned long end) -+{ -+ struct inode *inode = tree->inode; -+ struct super_block *sb = inode->i_sb; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_ext_path *path; -+ handle_t *handle; -+ int i = 0, err = 0; -+ -+ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); -+ -+ /* probably first extent we're gonna free will be last in block */ -+ handle = ext3_journal_start(inode, depth + 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext3_ext_invalidate_cache(tree); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(sb, "ext3_ext_remove_space", -+ "Can't allocate path array"); -+ ext3_journal_stop(handle); -+ return -ENOMEM; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[i].p_hdr = EXT_ROOT_HDR(tree); -+ -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_rm_leaf(handle, tree, path, start, end); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ ext_debug(tree, "initialize header\n"); -+ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); -+ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = -+ ext3_ext_last_covered(path[i].p_hdr, end); -+ path[i].p_block = path[i].p_hdr->eh_entries + 1; -+ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->eh_entries); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_rm(path + i)) { -+ /* go to the next level */ -+ ext_debug(tree, "move to level %d (block %d)\n", -+ i + 1, path[i].p_idx->ei_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ /* put actual number of indexes to know is this -+ * number got changed at the next iteration */ -+ path[i].p_block = path[i].p_hdr->eh_entries; -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->eh_entries == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncatei_leaf() */ -+ err = ext3_ext_rm_idx(handle, tree, path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(tree, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->eh_entries == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct eh_depth -+ */ -+ err = ext3_ext_get_access(handle, tree, path); -+ if (err == 0) { -+ EXT_ROOT_HDR(tree)->eh_depth = 0; -+ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); -+ err = ext3_ext_dirty(handle, tree, path); -+ } -+ } -+ ext3_ext_tree_changed(tree); -+ -+ kfree(path); -+ ext3_journal_stop(handle); -+ -+ return err; -+} -+ -+int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks) -+{ -+ int lcap, icap, rcap, leafs, idxs, num; -+ -+ rcap = ext3_ext_space_root(tree); -+ if (blocks <= rcap) { -+ /* all extents fit to the root */ -+ return 0; -+ } -+ -+ rcap = ext3_ext_space_root_idx(tree); -+ lcap = ext3_ext_space_block(tree); -+ icap = ext3_ext_space_block_idx(tree); -+ -+ num = leafs = (blocks + lcap - 1) / lcap; -+ if (leafs <= rcap) { -+ /* all pointers to leafs fit to the root */ -+ return leafs; -+ } -+ -+ /* ok. we need separate index block(s) to link all leaf blocks */ -+ idxs = (leafs + icap - 1) / icap; -+ do { -+ num += idxs; -+ idxs = (idxs + icap - 1) / icap; -+ } while (idxs > rcap); -+ -+ return num; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) { -+ printk("EXT3-fs: file extents enabled"); -+#ifdef AGRESSIVE_TEST -+ printk(", agressive tests"); -+#endif -+#ifdef CHECK_BINSEARCH -+ printk(", check binsearch"); -+#endif -+ printk("\n"); -+ } -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+} -+ -+/************************************************************************ -+ * VFS related routines -+ ************************************************************************/ -+ -+static int ext3_get_inode_write_access(handle_t *handle, void *buffer) -+{ -+ /* we use in-core data, not bh */ -+ return 0; -+} -+ -+static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) -+{ -+ struct inode *inode = buffer; -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static int ext3_ext_mergable(struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ /* FIXME: support for large fs */ -+ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) -+ return 1; -+ return 0; -+} -+ -+static int -+ext3_remove_blocks_credits(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed; -+ -+ /* at present, extent can't cross block group */; -+ needed = 4; /* bitmap + group desc + sb + inode */ -+ -+#ifdef CONFIG_QUOTA -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ return needed; -+} -+ -+static int -+ext3_remove_blocks(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed = ext3_remove_blocks_credits(tree, ex, from, to); -+ handle_t *handle = ext3_journal_start(tree->inode, needed); -+ struct buffer_head *bh; -+ int i; -+ -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { -+ /* tail removal */ -+ unsigned long num, start; -+ num = ex->ee_block + ex->ee_len - from; -+ start = ex->ee_start + ex->ee_len - num; -+ ext_debug(tree, "free last %lu blocks starting %lu\n", -+ num, start); -+ for (i = 0; i < num; i++) { -+ bh = sb_find_get_block(tree->inode->i_sb, start + i); -+ ext3_forget(handle, 0, tree->inode, bh, start + i); -+ } -+ ext3_free_blocks(handle, tree->inode, start, num); -+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { -+ printk("strange request: removal %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } else { -+ printk("strange request: removal(2) %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } -+ ext3_journal_stop(handle); -+ return 0; -+} -+ -+static int ext3_ext_find_goal(struct inode *inode, -+ struct ext3_ext_path *path, unsigned long block) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ struct ext3_extent *ex; -+ depth = path->p_depth; -+ -+ /* try to predict block placement */ -+ if ((ex = path[depth].p_ext)) -+ return ex->ee_start + (block - ex->ee_block); -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour + block; -+} -+ -+static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int *err) -+{ -+ struct inode *inode = tree->inode; -+ int newblock, goal; -+ -+ EXT_ASSERT(path); -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_start); -+ EXT_ASSERT(ex->ee_len); -+ -+ /* reuse block from the extent to order data/metadata */ -+ newblock = ex->ee_start++; -+ ex->ee_len--; -+ if (ex->ee_len == 0) { -+ ex->ee_len = 1; -+ /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ ex->ee_start = ext3_new_block(handle, inode, goal, 0, 0, err); -+ if (ex->ee_start == 0) { -+ /* error occured: restore old extent */ -+ ex->ee_start = newblock; -+ return 0; -+ } -+ } -+ return newblock; -+} -+ -+static struct ext3_extents_helpers ext3_blockmap_helpers = { -+ .get_write_access = ext3_get_inode_write_access, -+ .mark_buffer_dirty = ext3_mark_buffer_dirty, -+ .mergable = ext3_ext_mergable, -+ .new_block = ext3_new_block_cb, -+ .remove_extent = ext3_remove_blocks, -+ .remove_extent_credits = ext3_remove_blocks_credits, -+}; -+ -+void ext3_init_tree_desc(struct ext3_extents_tree *tree, -+ struct inode *inode) -+{ -+ tree->inode = inode; -+ tree->root = (void *) EXT3_I(inode)->i_data; -+ tree->buffer = (void *) inode; -+ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_extent *) &EXT3_I(inode)->i_cached_extent; -+ tree->ops = &ext3_blockmap_helpers; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, -+ long iblock, struct buffer_head *bh_result, -+ int create, int extend_disksize) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0, depth; -+ struct ext3_extents_tree tree; -+ -+ clear_buffer_new(bh_result); -+ ext3_init_tree_desc(&tree, inode); -+ ext_debug(&tree, "block %d requested for inode %u\n", -+ (int) iblock, (unsigned) inode->i_ino); -+ down(&EXT3_I(inode)->truncate_sem); -+ -+ /* check in cache */ -+ if (ext3_ext_in_cache(&tree, iblock, &newex)) { -+ if (newex.ee_start == EXT_CACHE_MARK) { -+ /* this is cached gap */ -+ if (!create) { -+ /* block isn't allocated yet and -+ * user don't want to allocate it */ -+ goto out2; -+ } -+ /* we should allocate requested block */ -+ } else if (newex.ee_start) { -+ /* block is already allocated */ -+ newblock = iblock - newex.ee_block + newex.ee_start; -+ goto out; -+ } -+ } -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(&tree, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ goto out2; -+ } -+ -+ depth = EXT_DEPTH(&tree); -+ -+ /* -+ * consistent leaf must not be empty -+ * this situations is possible, though, _during_ tree modification -+ * this is why assert can't be put in ext3_ext_find_extent() -+ */ -+ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { -+ newblock = iblock - ex->ee_block + ex->ee_start; -+ ext_debug(&tree, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->ee_block, ex->ee_len, -+ newblock); -+ ext3_ext_put_in_cache(&tree, ex); -+ goto out; -+ } -+ } -+ -+ /* -+ * requested block isn't allocated yet -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) { -+ /* put just found gap into cache to speedup subsequest reqs */ -+ ext3_ext_put_gap_in_cache(&tree, path, iblock); -+ goto out2; -+ } -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path, iblock); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, &err); -+ if (!newblock) -+ goto out2; -+ ext_debug(&tree, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.ee_block = iblock; -+ newex.ee_start = newblock; -+ newex.ee_len = 1; -+ err = ext3_ext_insert_extent(handle, &tree, path, &newex); -+ if (err) -+ goto out2; -+ -+ if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.ee_start; -+ set_buffer_new(bh_result); -+ -+ ext3_ext_put_in_cache(&tree, &newex); -+out: -+ ext3_ext_show_leaf(&tree, path); -+ map_bh(bh_result, inode->i_sb, newblock); -+out2: -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ up(&EXT3_I(inode)->truncate_sem); -+ -+ return err; -+} -+ -+void ext3_ext_truncate(struct inode * inode, struct page *page) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_extents_tree tree; -+ unsigned long last_block; -+ handle_t *handle; -+ int err = 0; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ err = ext3_writepage_trans_blocks(inode) + 3; -+ handle = ext3_journal_start(inode, err); -+ if (IS_ERR(handle)) { -+ if (page) { -+ clear_highpage(page); -+ flush_dcache_page(page); -+ unlock_page(page); -+ page_cache_release(page); -+ } -+ return; -+ } -+ -+ if (page) -+ ext3_block_truncate_page(handle, page, mapping, inode->i_size); -+ -+ down(&EXT3_I(inode)->truncate_sem); -+ ext3_ext_invalidate_cache(&tree); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ last_block = (inode->i_size + sb->s_blocksize - 1) -+ >> EXT3_BLOCK_SIZE_BITS(sb); -+ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ up(&EXT3_I(inode)->truncate_sem); -+ ext3_journal_stop(handle); -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_extents_tree tree; -+ int needed; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ ext3_extent_tree_init(handle, &tree); -+} -+ -+int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ return ext3_ext_calc_metadata_amount(&tree, blocks); -+} -+ -+static int -+ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newex, int exist) -+{ -+ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; -+ -+ if (!exist) -+ return EXT_CONTINUE; -+ if (buf->err < 0) -+ return EXT_BREAK; -+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) -+ return EXT_BREAK; -+ -+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { -+ buf->err++; -+ buf->cur += sizeof(*newex); -+ } else { -+ buf->err = -EFAULT; -+ return EXT_BREAK; -+ } -+ return EXT_CONTINUE; -+} -+ -+static int -+ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int exist) -+{ -+ struct ext3_extent_tree_stats *buf = -+ (struct ext3_extent_tree_stats *) tree->private; -+ int depth; -+ -+ if (!exist) -+ return EXT_CONTINUE; -+ -+ depth = EXT_DEPTH(tree); -+ buf->extents_num++; -+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ buf->leaf_num++; -+ return EXT_CONTINUE; -+} -+ -+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return -EINVAL; -+ -+ if (cmd == EXT3_IOC_GET_EXTENTS) { -+ struct ext3_extent_buf buf; -+ struct ext3_extents_tree tree; -+ -+ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) -+ return -EFAULT; -+ -+ ext3_init_tree_desc(&tree, inode); -+ buf.cur = buf.buffer; -+ buf.err = 0; -+ tree.private = &buf; -+ down(&EXT3_I(inode)->truncate_sem); -+ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, -+ ext3_ext_store_extent_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (err == 0) -+ err = buf.err; -+ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { -+ struct ext3_extent_tree_stats buf; -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ buf.depth = EXT_DEPTH(&tree); -+ buf.extents_num = 0; -+ buf.leaf_num = 0; -+ tree.private = &buf; -+ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, -+ ext3_ext_collect_stats_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (!err) -+ err = copy_to_user((void *) arg, &buf, sizeof(buf)); -+ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { -+ struct ext3_extents_tree tree; -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ err = EXT_DEPTH(&tree); -+ up(&EXT3_I(inode)->truncate_sem); -+ } -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(ext3_init_tree_desc); -+EXPORT_SYMBOL(ext3_mark_inode_dirty); -+EXPORT_SYMBOL(ext3_ext_invalidate_cache); -+EXPORT_SYMBOL(ext3_ext_insert_extent); -+EXPORT_SYMBOL(ext3_ext_walk_space); -+EXPORT_SYMBOL(ext3_ext_find_goal); -+EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); -+ -Index: linux-2.6.7/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.7.orig/fs/ext3/ialloc.c 2004-09-12 20:07:29.000000000 +0400 -+++ linux-2.6.7/fs/ext3/ialloc.c 2004-09-12 20:09:25.000000000 +0400 -@@ -646,6 +646,17 @@ - DQUOT_FREE_INODE(inode); - goto fail2; - } -+ if (test_opt(sb, EXTENTS)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ ext3_extents_initialize_blockmap(handle, inode); -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ if (err) goto fail; -+ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ } -+ } - err = ext3_mark_inode_dirty(handle, inode); - if (err) { - ext3_std_error(sb, err); -Index: linux-2.6.7/fs/ext3/inode.c -=================================================================== ---- linux-2.6.7.orig/fs/ext3/inode.c 2004-09-12 20:07:29.000000000 +0400 -+++ linux-2.6.7/fs/ext3/inode.c 2004-09-12 20:07:35.000000000 +0400 -@@ -857,6 +857,17 @@ - goto reread; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create, int extend_disksize) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create, -+ extend_disksize); -+ return ext3_get_block_handle(handle, inode, block, bh, create, -+ extend_disksize); -+} -+ - static int ext3_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) - { -@@ -867,8 +878,8 @@ - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, -- bh_result, create, 1); -+ ret = ext3_get_block_wrap(handle, inode, iblock, -+ bh_result, create, 1); - return ret; - } - -@@ -894,8 +905,8 @@ - } - } - if (ret == 0) -- ret = ext3_get_block_handle(handle, inode, iblock, -- bh_result, create, 0); -+ ret = ext3_get_block_wrap(handle, inode, iblock, -+ bh_result, create, 0); - if (ret == 0) - bh_result->b_size = (1 << inode->i_blkbits); - return ret; -@@ -916,7 +927,7 @@ - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1669,7 +1680,7 @@ - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, struct page *page, -+int ext3_block_truncate_page(handle_t *handle, struct page *page, - struct address_space *mapping, loff_t from) - { - unsigned long index = from >> PAGE_CACHE_SHIFT; -@@ -2165,6 +2176,9 @@ - return; - } - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode, page); -+ - handle = start_transaction(inode); - if (IS_ERR(handle)) { - if (page) { -@@ -2888,6 +2902,9 @@ - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -Index: linux-2.6.7/fs/ext3/Makefile -=================================================================== ---- linux-2.6.7.orig/fs/ext3/Makefile 2004-09-12 20:07:20.000000000 +0400 -+++ linux-2.6.7/fs/ext3/Makefile 2004-09-12 20:07:35.000000000 +0400 -@@ -5,7 +5,7 @@ - obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o hash.o -+ ioctl.o namei.o super.o symlink.o hash.o extents.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-2.6.7/fs/ext3/super.c -=================================================================== ---- linux-2.6.7.orig/fs/ext3/super.c 2004-09-12 20:07:31.000000000 +0400 -+++ linux-2.6.7/fs/ext3/super.c 2004-09-12 20:07:35.000000000 +0400 -@@ -392,6 +392,7 @@ - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -455,6 +456,9 @@ - ei->i_default_acl = EXT3_ACL_NOT_CACHED; - #endif - ei->vfs_inode.i_version = 1; -+ ei->i_cached_extent[0] = 0; -+ ei->i_cached_extent[1] = 0; -+ ei->i_cached_extent[2] = 0; - return &ei->vfs_inode; - } - -@@ -590,7 +594,7 @@ - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, -- Opt_ignore, Opt_err, -+ Opt_ignore, Opt_err, Opt_extents, Opt_extdebug - }; - - static match_table_t tokens = { -@@ -638,6 +642,8 @@ - {Opt_iopen, "iopen"}, - {Opt_noiopen, "noiopen"}, - {Opt_iopen_nopriv, "iopen_nopriv"}, -+ {Opt_extents, "extents"}, -+ {Opt_extdebug, "extdebug"}, - {Opt_err, NULL} - }; - -@@ -917,6 +923,12 @@ - break; - case Opt_ignore: - break; -+ case Opt_extents: -+ set_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_extdebug: -+ set_opt (sbi->s_mount_opt, EXTDEBUG); -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1589,6 +1601,8 @@ - percpu_counter_mod(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); - -+ ext3_ext_init(sb); -+ - return 0; - - failed_mount3: -Index: linux-2.6.7/fs/ext3/ioctl.c -=================================================================== ---- linux-2.6.7.orig/fs/ext3/ioctl.c 2004-09-12 20:07:16.000000000 +0400 -+++ linux-2.6.7/fs/ext3/ioctl.c 2004-09-12 20:07:35.000000000 +0400 -@@ -176,6 +176,10 @@ - return ret; - } - #endif -+ case EXT3_IOC_GET_EXTENTS: -+ case EXT3_IOC_GET_TREE_STATS: -+ case EXT3_IOC_GET_TREE_DEPTH: -+ return ext3_ext_ioctl(inode, filp, cmd, arg); - default: - return -ENOTTY; - } -Index: linux-2.6.7/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.7.orig/include/linux/ext3_fs.h 2004-09-12 20:07:33.000000000 +0400 -+++ linux-2.6.7/include/linux/ext3_fs.h 2004-09-12 20:10:02.000000000 +0400 -@@ -186,6 +186,7 @@ - #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ - #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - - #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ -@@ -209,6 +210,9 @@ - #ifdef CONFIG_JBD_DEBUG - #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) - #endif -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 5, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 6, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 7, long) - - /* - * Structure of an inode on the disk -@@ -329,6 +333,8 @@ - #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ - #define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ -+#define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -517,11 +523,13 @@ - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 -+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ -- EXT3_FEATURE_INCOMPAT_META_BG) -+ EXT3_FEATURE_INCOMPAT_META_BG| \ -+ EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -@@ -725,6 +733,7 @@ - - - /* inode.c */ -+extern int ext3_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t); - extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -798,6 +807,14 @@ - extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int, int); -+extern void ext3_ext_truncate(struct inode *, struct page *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); -+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); - - #endif /* __KERNEL__ */ - -Index: linux-2.6.7/include/linux/ext3_extents.h -=================================================================== ---- linux-2.6.7.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.6.7/include/linux/ext3_extents.h 2004-09-12 20:07:35.000000000 +0400 -@@ -0,0 +1,238 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+#ifndef _LINUX_EXT3_EXTENTS -+#define _LINUX_EXT3_EXTENTS -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if CHECK_BINSEARCH defined, then results of binary search -+ * will be checked by linear search -+ */ -+#define CHECK_BINSEARCH_ -+ -+/* -+ * if EXT_DEBUG is defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG -+#ifdef EXT_DEBUG -+#define ext_debug(tree,fmt,a...) \ -+do { \ -+ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(tree,fmt,a...) -+#endif -+ -+/* -+ * if EXT_STATS is defined then stats numbers are collected -+ * these number will be displayed at umount time -+ */ -+#define EXT_STATS_ -+ -+ -+#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 ee_block; /* first logical block extent covers */ -+ __u16 ee_len; /* number of blocks covered by extent */ -+ __u16 ee_start_hi; /* high 16 bits of physical block */ -+ __u32 ee_start; /* low 32 bigs of physical block */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 ei_block; /* index covers logical blocks from 'block' */ -+ __u32 ei_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+ __u16 ei_leaf_hi; /* high 16 bits of physical block */ -+ __u16 ei_unused; -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 eh_magic; /* probably will support different formats */ -+ __u16 eh_entries; /* number of valid entries */ -+ __u16 eh_max; /* capacity of store in entries */ -+ __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* generation of the tree */ -+}; -+ -+#define EXT3_EXT_MAGIC 0xf30a -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+/* -+ * structure for external API -+ */ -+ -+/* -+ * ext3_extents_tree is used to pass initial information -+ * to top-level extents API -+ */ -+struct ext3_extents_helpers; -+struct ext3_extents_tree { -+ struct inode *inode; /* inode which tree belongs to */ -+ void *root; /* ptr to data top of tree resides at */ -+ void *buffer; /* will be passed as arg to ^^ routines */ -+ int buffer_len; -+ void *private; -+ struct ext3_extent *cex;/* last found extent */ -+ struct ext3_extents_helpers *ops; -+}; -+ -+struct ext3_extents_helpers { -+ int (*get_write_access)(handle_t *h, void *buffer); -+ int (*mark_buffer_dirty)(handle_t *h, void *buffer); -+ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); -+ int (*remove_extent_credits)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*remove_extent)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*new_block)(handle_t *, struct ext3_extents_tree *, -+ struct ext3_ext_path *, struct ext3_extent *, -+ int *); -+}; -+ -+/* -+ * to be called by ext3_ext_walk_space() -+ * negative retcode - error -+ * positive retcode - signal for ext3_ext_walk_space(), see below -+ * callback must return valid extent (passed or newly created) -+ */ -+typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, -+ struct ext3_ext_path *, -+ struct ext3_extent *, int); -+ -+#define EXT_CONTINUE 0 -+#define EXT_BREAK 1 -+#define EXT_REPEAT 2 -+ -+ -+#define EXT_MAX_BLOCK 0xffffffff -+#define EXT_CACHE_MARK 0xffff -+ -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+ -+#define EXT_ROOT_HDR(tree) \ -+ ((struct ext3_extent_header *) (tree)->root) -+#define EXT_BLOCK_HDR(bh) \ -+ ((struct ext3_extent_header *) (bh)->b_data) -+#define EXT_DEPTH(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) -+#define EXT_GENERATION(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) -+ -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+ -+/* -+ * this structure is used to gather extents from the tree via ioctl -+ */ -+struct ext3_extent_buf { -+ unsigned long start; -+ int buflen; -+ void *buffer; -+ void *cur; -+ int err; -+}; -+ -+/* -+ * this structure is used to collect stats info about the tree -+ */ -+struct ext3_extent_tree_stats { -+ int depth; -+ int extents_num; -+ int leaf_num; -+}; -+ -+extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); -+extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -+extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); -+extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); -+extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); -+extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); -+extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); -+extern int ext3_ext_calc_blockmap_metadata(struct inode *, int); -+ -+static inline void -+ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) -+{ -+ if (tree->cex) -+ tree->cex->ee_len = 0; -+} -+ -+ -+#endif /* _LINUX_EXT3_EXTENTS */ -+ -Index: linux-2.6.7/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.7.orig/include/linux/ext3_fs_i.h 2004-09-12 20:07:29.000000000 +0400 -+++ linux-2.6.7/include/linux/ext3_fs_i.h 2004-09-12 20:07:35.000000000 +0400 -@@ -111,6 +111,8 @@ - */ - struct semaphore truncate_sem; - struct inode vfs_inode; -+ -+ __u32 i_cached_extent[3]; - }; - - #endif /* _LINUX_EXT3_FS_I */ - -%diffstat - fs/ext3/Makefile | 2 - fs/ext3/extents.c | 2306 +++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/ialloc.c | 11 - fs/ext3/inode.c | 29 - fs/ext3/ioctl.c | 4 - fs/ext3/super.c | 16 - include/linux/ext3_extents.h | 238 ++++ - include/linux/ext3_fs.h | 19 - include/linux/ext3_fs_i.h | 2 - 9 files changed, 2618 insertions(+), 9 deletions(-) - diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch index 6517722..7c3d8bd 100644 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch @@ -1650,9 +1650,9 @@ Index: linux-stage/include/linux/ext3_fs.h */ @@ -336,6 +338,7 @@ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x10000 /* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x20000 /* Extents debug */ -+#define EXT3_MOUNT_MBALLOC 0x100000/* Buddy allocation support */ + #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ + #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ ++#define EXT3_MOUNT_MBALLOC 0x400000/* Buddy allocation support */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt diff --git a/lustre/kernel_patches/patches/iopen-2.6-vanilla.patch b/lustre/kernel_patches/patches/iopen-2.6-vanilla.patch deleted file mode 100644 index 4b31f06..0000000 --- a/lustre/kernel_patches/patches/iopen-2.6-vanilla.patch +++ /dev/null @@ -1,476 +0,0 @@ - fs/ext3/inode.c | 3 - fs/ext3/iopen.c | 239 +++++++++++++++++++++++++++++++++++++ - fs/ext3/iopen.h | 15 ++ - fs/ext3/namei.c | 13 ++ - fs/ext3/super.c | 17 ++ - include/linux/ext3_fs.h | 2 - 7 files changed, 304 insertions(+), 1 deletion(-) - -Index: linux-stage/fs/ext3/Makefile -=================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2004-06-22 21:12:15.000000000 -0400 -+++ linux-stage/fs/ext3/Makefile 2004-06-22 21:13:20.000000000 -0400 -@@ -4,7 +4,7 @@ - - obj-$(CONFIG_EXT3_FS) += ext3.o - --ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -Index: linux-stage/fs/ext3/inode.c -=================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2004-06-22 21:12:16.000000000 -0400 -+++ linux-stage/fs/ext3/inode.c 2004-06-22 21:13:20.000000000 -0400 -@@ -37,6 +37,7 @@ - #include - #include - #include "xattr.h" -+#include "iopen.h" - #include "acl.h" - - /* -@@ -2470,6 +2471,9 @@ - ei->i_acl = EXT3_ACL_NOT_CACHED; - ei->i_default_acl = EXT3_ACL_NOT_CACHED; - #endif -+ if (ext3_iopen_get_inode(inode)) -+ return; -+ - if (ext3_get_inode_loc(inode, &iloc, 0)) - goto bad_inode; - bh = iloc.bh; -Index: linux-stage/fs/ext3/iopen.c -=================================================================== ---- linux-stage.orig/fs/ext3/iopen.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-stage/fs/ext3/iopen.c 2004-06-22 21:13:20.000000000 -0400 -@@ -0,0 +1,274 @@ -+/* -+ * linux/fs/ext3/iopen.c -+ * -+ * Special support for open by inode number -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ * -+ * -+ * Invariants: -+ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias -+ * for an inode at one time. -+ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry -+ * aliases on an inode at the same time. -+ * -+ * If we have any connected dentry aliases for an inode, use one of those -+ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED -+ * dentry for this inode, which thereafter will be found by the dcache -+ * when looking up this inode number in __iopen__, so we don't return here -+ * until it is gone. -+ * -+ * If we get an inode via a regular name lookup, then we "rename" the -+ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures -+ * existing users of the disconnected dentry will continue to use the same -+ * dentry as the connected users, and there will never be both kinds of -+ * dentry aliases at one time. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "iopen.h" -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#define IOPEN_NAME_LEN 32 -+ -+/* -+ * This implements looking up an inode by number. -+ */ -+static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, -+ struct nameidata *nd) -+{ -+ struct inode *inode; -+ unsigned long ino; -+ struct list_head *lp; -+ struct dentry *alternate; -+ char buf[IOPEN_NAME_LEN]; -+ -+ if (dentry->d_name.len >= IOPEN_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ memcpy(buf, dentry->d_name.name, dentry->d_name.len); -+ buf[dentry->d_name.len] = 0; -+ -+ if (strcmp(buf, ".") == 0) -+ ino = dir->i_ino; -+ else if (strcmp(buf, "..") == 0) -+ ino = EXT3_ROOT_INO; -+ else -+ ino = simple_strtoul(buf, 0, 0); -+ -+ if ((ino != EXT3_ROOT_INO && -+ //ino != EXT3_ACL_IDX_INO && -+ //ino != EXT3_ACL_DATA_INO && -+ ino < EXT3_FIRST_INO(dir->i_sb)) || -+ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) -+ return ERR_PTR(-ENOENT); -+ -+ inode = iget(dir->i_sb, ino); -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ if (is_bad_inode(inode)) { -+ iput(inode); -+ return ERR_PTR(-ENOENT); -+ } -+ -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ assert(d_unhashed(dentry)); /* d_rehash */ -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); -+ } -+ -+ if (!list_empty(&inode->i_dentry)) { -+ alternate = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ dget_locked(alternate); -+ spin_lock(&alternate->d_lock); -+ alternate->d_flags |= DCACHE_REFERENCED; -+ spin_unlock(&alternate->d_lock); -+ iput(inode); -+ spin_unlock(&dcache_lock); -+ return alternate; -+ } -+ dentry->d_flags |= DCACHE_DISCONNECTED; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+ -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+#define do_switch(x,y) do { \ -+ __typeof__ (x) __tmp = x; \ -+ x = y; y = __tmp; } while (0) -+ -+static inline void switch_names(struct dentry *dentry, struct dentry *target) -+{ -+ const unsigned char *old_name, *new_name; -+ -+ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN_MIN); -+ old_name = target->d_name.name; -+ new_name = dentry->d_name.name; -+ if (old_name == target->d_iname) -+ old_name = dentry->d_iname; -+ if (new_name == dentry->d_iname) -+ new_name = target->d_iname; -+ target->d_name.name = new_name; -+ dentry->d_name.name = old_name; -+} -+ -+/* This function is spliced into ext3_lookup and does the move of a -+ * disconnected dentry (if it exists) to a connected dentry. -+ */ -+struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, -+ int rehash) -+{ -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* verify this dentry is really new */ -+ assert(dentry->d_inode == NULL); -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ if (rehash) -+ assert(d_unhashed(dentry)); /* d_rehash */ -+ assert(list_empty(&dentry->d_subdirs)); -+ -+ spin_lock(&dcache_lock); -+ if (!inode) -+ goto do_rehash; -+ -+ /* preferrably return a connected dentry */ -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ if (tmp->d_flags & DCACHE_DISCONNECTED) { -+ assert(tmp->d_alias.next == &inode->i_dentry); -+ assert(tmp->d_alias.prev == &inode->i_dentry); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ } -+ -+ if (!goal) -+ goto do_instantiate; -+ -+ /* Move the goal to the de hash queue */ -+ goal->d_flags &= ~ DCACHE_DISCONNECTED; -+ security_d_instantiate(goal, inode); -+ __d_rehash(dentry, 0); -+ __d_move(goal, dentry); -+ spin_unlock(&dcache_lock); -+ iput(inode); -+ -+ return goal; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+do_instantiate: -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+do_rehash: -+ if (rehash) -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+/* -+ * These are the special structures for the iopen pseudo directory. -+ */ -+ -+static struct inode_operations iopen_inode_operations = { -+ lookup: iopen_lookup, /* BKL held */ -+}; -+ -+static struct file_operations iopen_file_operations = { -+ read: generic_read_dir, -+}; -+ -+static int match_dentry(struct dentry *dentry, const char *name) -+{ -+ int len; -+ -+ len = strlen(name); -+ if (dentry->d_name.len != len) -+ return 0; -+ if (strncmp(dentry->d_name.name, name, len)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * This function is spliced into ext3_lookup and returns 1 the file -+ * name is __iopen__ and dentry has been filled in appropriately. -+ */ -+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ if (dir->i_ino != EXT3_ROOT_INO || -+ !test_opt(dir->i_sb, IOPEN) || -+ !match_dentry(dentry, "__iopen__")) -+ return 0; -+ -+ inode = iget(dir->i_sb, EXT3_BAD_INO); -+ -+ if (!inode) -+ return 0; -+ d_add(dentry, inode); -+ return 1; -+} -+ -+/* -+ * This function is spliced into read_inode; it returns 1 if inode -+ * number is the one for /__iopen__, in which case the inode is filled -+ * in appropriately. Otherwise, this fuction returns 0. -+ */ -+int ext3_iopen_get_inode(struct inode *inode) -+{ -+ if (inode->i_ino != EXT3_BAD_INO) -+ return 0; -+ -+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; -+ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) -+ inode->i_mode |= 0777; -+ inode->i_uid = 0; -+ inode->i_gid = 0; -+ inode->i_nlink = 1; -+ inode->i_size = 4096; -+ inode->i_atime = CURRENT_TIME; -+ inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = CURRENT_TIME; -+ EXT3_I(inode)->i_dtime = 0; -+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size -+ * (for stat), not the fs block -+ * size */ -+ inode->i_blocks = 0; -+ inode->i_version = 1; -+ inode->i_generation = 0; -+ -+ inode->i_op = &iopen_inode_operations; -+ inode->i_fop = &iopen_file_operations; -+ inode->i_mapping->a_ops = 0; -+ -+ return 1; -+} -Index: linux-stage/fs/ext3/iopen.h -=================================================================== ---- linux-stage.orig/fs/ext3/iopen.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-stage/fs/ext3/iopen.h 2004-06-22 21:13:20.000000000 -0400 -@@ -0,0 +1,15 @@ -+/* -+ * iopen.h -+ * -+ * Special support for opening files by inode number. -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ */ -+ -+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); -+extern int ext3_iopen_get_inode(struct inode *inode); -+extern struct dentry *iopen_connect_dentry(struct dentry *dentry, -+ struct inode *inode, int rehash); -Index: linux-stage/fs/ext3/namei.c -=================================================================== ---- linux-stage.orig/fs/ext3/namei.c 2004-06-22 21:12:16.000000000 -0400 -+++ linux-stage/fs/ext3/namei.c 2004-06-22 21:13:20.000000000 -0400 -@@ -37,6 +37,7 @@ - #include - #include - #include "xattr.h" -+#include "iopen.h" - #include "acl.h" - - /* -@@ -979,6 +980,9 @@ - if (dentry->d_name.len > EXT3_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - -+ if (ext3_check_for_iopen(dir, dentry)) -+ return NULL; -+ - bh = ext3_find_entry(dentry, &de); - inode = NULL; - if (bh) { -@@ -989,10 +993,8 @@ - if (!inode) - return ERR_PTR(-EACCES); - } -- if (inode) -- return d_splice_alias(inode, dentry); -- d_add(dentry, inode); -- return NULL; -+ -+ return iopen_connect_dentry(dentry, inode, 1); - } - - -@@ -2022,10 +2024,6 @@ - inode->i_nlink); - inode->i_version++; - inode->i_nlink = 0; -- /* There's no need to set i_disksize: the fact that i_nlink is -- * zero will ensure that the right thing happens during any -- * recovery. */ -- inode->i_size = 0; - ext3_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); -@@ -2145,6 +2143,23 @@ - return err; - } - -+/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ -+static int ext3_add_link(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ int err = ext3_add_entry(handle, dentry, inode); -+ if (!err) { -+ err = ext3_mark_inode_dirty(handle, inode); -+ if (err == 0) { -+ dput(iopen_connect_dentry(dentry, inode, 0)); -+ return 0; -+ } -+ } -+ ext3_dec_count(handle, inode); -+ iput(inode); -+ return err; -+} -+ - static int ext3_link (struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) - { -@@ -2167,7 +2182,8 @@ - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -- err = ext3_add_nondir(handle, dentry, inode); -+ err = ext3_add_link(handle, dentry, inode); -+ ext3_orphan_del(handle,inode); - ext3_journal_stop(handle); - return err; - } -Index: linux-stage/fs/ext3/super.c -=================================================================== ---- linux-stage.orig/fs/ext3/super.c 2004-06-22 21:12:16.000000000 -0400 -+++ linux-stage/fs/ext3/super.c 2004-06-22 21:13:51.000000000 -0400 -@@ -583,6 +583,7 @@ - Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, -+ Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - Opt_ignore, Opt_err, - }; - -@@ -628,6 +629,9 @@ - {Opt_ignore, "noquota"}, - {Opt_ignore, "quota"}, - {Opt_ignore, "usrquota"}, -+ {Opt_iopen, "iopen"}, -+ {Opt_noiopen, "noiopen"}, -+ {Opt_iopen_nopriv, "iopen_nopriv"}, - {Opt_err, NULL} - }; - -@@ -893,6 +897,18 @@ - case Opt_abort: - set_opt(sbi->s_mount_opt, ABORT); - break; -+ case Opt_iopen: -+ set_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; -+ case Opt_noiopen: -+ clear_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; -+ case Opt_iopen_nopriv: -+ set_opt (sbi->s_mount_opt, IOPEN); -+ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; - case Opt_ignore: - break; - default: -Index: linux-stage/include/linux/ext3_fs.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2004-06-22 21:12:16.000000000 -0400 -+++ linux-stage/include/linux/ext3_fs.h 2004-06-22 21:13:20.000000000 -0400 -@@ -325,6 +325,8 @@ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ -+#define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */ -+#define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H diff --git a/lustre/kernel_patches/patches/kernel_text_address-2.4.18-chaos.patch b/lustre/kernel_patches/patches/kernel_text_address-2.4.18-chaos.patch deleted file mode 100644 index bcfc372..0000000 --- a/lustre/kernel_patches/patches/kernel_text_address-2.4.18-chaos.patch +++ /dev/null @@ -1,40 +0,0 @@ -Index: linux-2.4.18-chaos/arch/i386/kernel/traps.c -=================================================================== ---- linux-2.4.18-chaos.orig/arch/i386/kernel/traps.c 2003-07-28 17:51:13.000000000 +0400 -+++ linux-2.4.18-chaos/arch/i386/kernel/traps.c 2003-12-11 02:10:33.000000000 +0300 -@@ -1153,3 +1153,35 @@ - EXPORT_SYMBOL_GPL(nmi_watchdog); - #endif - -+#ifdef CONFIG_MODULES -+extern struct module *module_list; -+extern struct module kernel_module; -+#endif -+ -+int is_kernel_text_address(unsigned long addr) -+{ -+ int retval = 0; -+#ifdef CONFIG_MODULES -+ struct module *mod; -+#endif -+ if (addr >= (unsigned long) &_stext && -+ addr <= (unsigned long) &_etext); -+ return 1; -+ -+#ifdef CONFIG_MODULES -+ for (mod = module_list; mod != &kernel_module; mod = mod->next) { -+ /* mod_bound tests for addr being inside the vmalloc'ed -+ * module area. Of course it'd be better to test only -+ * for the .text subset... */ -+ if (mod_bound(addr, 0, mod)) { -+ retval = 1; -+ break; -+ } -+ } -+#endif -+ -+ return retval; -+} -+ -+EXPORT_SYMBOL(lookup_symbol); -+EXPORT_SYMBOL_GPL(is_kernel_text_address); diff --git a/lustre/kernel_patches/patches/kernel_text_address-2.4.20-rh.patch b/lustre/kernel_patches/patches/kernel_text_address-2.4.20-rh.patch deleted file mode 100644 index 6e78be1..0000000 --- a/lustre/kernel_patches/patches/kernel_text_address-2.4.20-rh.patch +++ /dev/null @@ -1,68 +0,0 @@ -Index: linux-2.4.20-20.9/kernel/kksymoops.c -=================================================================== ---- linux-2.4.20-20.9.orig/kernel/kksymoops.c 2003-08-18 23:16:51.000000000 +0800 -+++ linux-2.4.20-20.9/kernel/kksymoops.c 2003-11-06 18:38:12.000000000 +0800 -@@ -80,3 +80,5 @@ - printk("%s\n",modlist); - #endif - } -+ -+EXPORT_SYMBOL(lookup_symbol); -Index: linux-2.4.20-20.9/kernel/Makefile -=================================================================== ---- linux-2.4.20-20.9.orig/kernel/Makefile 2003-11-06 18:35:56.000000000 +0800 -+++ linux-2.4.20-20.9/kernel/Makefile 2003-11-06 18:42:57.000000000 +0800 -@@ -9,7 +9,7 @@ - - O_TARGET := kernel.o - --export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o cpufreq.o profile.o -+export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o cpufreq.o profile.o kksymoops.o - - obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o lowlat.o profile.o \ - module.o exit.o itimer.o info.o time.o softirq.o resource.o \ -Index: linux-2.4.20-20.9/arch/i386/kernel/traps.c -=================================================================== ---- linux-2.4.20-20.9.orig/arch/i386/kernel/traps.c 2003-11-06 18:35:56.000000000 +0800 -+++ linux-2.4.20-20.9/arch/i386/kernel/traps.c 2003-11-06 18:43:26.000000000 +0800 -@@ -1027,9 +1019,39 @@ - #endif - } - -+#ifdef CONFIG_MODULES -+extern struct module *module_list; -+extern struct module kernel_module; -+#endif -+ -+int is_kernel_text_address(unsigned long addr) -+{ -+ int retval = 0; -+#ifdef CONFIG_MODULES -+ struct module *mod; -+#endif -+ if (addr >= (unsigned long) &_stext && -+ addr <= (unsigned long) &_etext); -+ return 1; -+ -+#ifdef CONFIG_MODULES -+ for (mod = module_list; mod != &kernel_module; mod = mod->next) { -+ /* mod_bound tests for addr being inside the vmalloc'ed -+ * module area. Of course it'd be better to test only -+ * for the .text subset... */ -+ if (mod_bound(addr, 0, mod)) { -+ retval = 1; -+ break; -+ } -+ } -+#endif -+ -+ return retval; -+} -+ - EXPORT_SYMBOL_GPL(netdump_func); - EXPORT_SYMBOL_GPL(netdump_mode); - #if CONFIG_X86_LOCAL_APIC - EXPORT_SYMBOL_GPL(nmi_watchdog); - #endif -- -+EXPORT_SYMBOL_GPL(is_kernel_text_address); diff --git a/lustre/kernel_patches/patches/kernel_text_address-2.4.20-vanilla.patch b/lustre/kernel_patches/patches/kernel_text_address-2.4.20-vanilla.patch deleted file mode 100644 index 1fe56e1..0000000 --- a/lustre/kernel_patches/patches/kernel_text_address-2.4.20-vanilla.patch +++ /dev/null @@ -1,116 +0,0 @@ -Index: linux-2.4.20-uml/arch/um/kernel/Makefile -=================================================================== ---- linux-2.4.20-uml.orig/arch/um/kernel/Makefile 2003-11-10 13:42:48.000000000 +0800 -+++ linux-2.4.20-uml/arch/um/kernel/Makefile 2003-11-10 13:43:06.000000000 +0800 -@@ -37,7 +37,8 @@ - export-objs-$(CONFIG_GPROF) += gprof_syms.o - export-objs-$(CONFIG_GCOV) += gmon_syms.o - --export-objs = ksyms.o process_kern.o signal_kern.o user_syms.o $(export-objs-y) -+export-objs = ksyms.o process_kern.o signal_kern.o user_syms.o sysrq.o \ -+ $(export-objs-y) - - CFLAGS_user_syms.o = -D__AUTOCONF_INCLUDED__ $(DMODULES-y) $(DMODVERSIONS-y) \ - -I/usr/include -I../include -Index: linux-2.4.20-uml/arch/um/kernel/sysrq.c -=================================================================== ---- linux-2.4.20-uml.orig/arch/um/kernel/sysrq.c 2003-11-10 13:42:49.000000000 +0800 -+++ linux-2.4.20-uml/arch/um/kernel/sysrq.c 2003-11-10 13:43:06.000000000 +0800 -@@ -86,6 +86,37 @@ - show_trace((unsigned long *)esp); - } - -+#ifdef CONFIG_MODULES -+extern struct module *module_list; -+extern struct module kernel_module; -+#endif -+ -+int is_kernel_text_address(unsigned long addr) -+{ -+ int retval = 0; -+#ifdef CONFIG_MODULES -+ struct module *mod; -+#endif -+ if (addr >= (unsigned long) &_stext && -+ addr <= (unsigned long) &_etext) -+ return 1; -+ -+#ifdef CONFIG_MODULES -+ for (mod = module_list; mod != &kernel_module; mod = mod->next) { -+ /* mod_bound tests for addr being inside the vmalloc'ed -+ * module area. Of course it'd be better to test only -+ * for the .text subset... */ -+ if (mod_bound(addr, 0, mod)) { -+ retval = 1; -+ break; -+ } -+ } -+#endif -+ return retval; -+} -+ -+EXPORT_SYMBOL(is_kernel_text_address); -+ - /* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically -Index: linux-2.4.20-uml/arch/i386/kernel/Makefile -=================================================================== ---- linux-2.4.20-uml.orig/arch/i386/kernel/Makefile 2002-11-29 07:53:09.000000000 +0800 -+++ linux-2.4.20-uml/arch/i386/kernel/Makefile 2003-11-10 14:39:28.000000000 +0800 -@@ -14,7 +14,8 @@ - - O_TARGET := kernel.o - --export-objs := mca.o mtrr.o msr.o cpuid.o microcode.o i386_ksyms.o time.o -+export-objs := mca.o mtrr.o msr.o cpuid.o microcode.o i386_ksyms.o time.o \ -+ traps.o - - obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ - ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ -Index: linux-2.4.20-uml/arch/i386/kernel/traps.c -=================================================================== ---- linux-2.4.20-uml.orig/arch/i386/kernel/traps.c 2002-11-29 07:53:09.000000000 +0800 -+++ linux-2.4.20-uml/arch/i386/kernel/traps.c 2003-11-10 14:53:53.000000000 +0800 -@@ -1004,3 +1004,41 @@ - cobalt_init(); - #endif - } -+ -+#ifdef CONFIG_MODULES -+extern struct module *module_list; -+extern struct module kernel_module; -+#endif -+ -+int is_kernel_text_address(unsigned long addr) -+{ -+ int retval = 0; -+#ifdef CONFIG_MODULES -+ struct module *mod; -+#endif -+ if (addr >= (unsigned long) &_stext && -+ addr <= (unsigned long) &_etext); -+ return 1; -+ -+#ifdef CONFIG_MODULES -+ for (mod = module_list; mod != &kernel_module; mod = mod->next) { -+ /* mod_bound tests for addr being inside the vmalloc'ed -+ * module area. Of course it'd be better to test only -+ * for the .text subset... */ -+ if (mod_bound(addr, 0, mod)) { -+ retval = 1; -+ break; -+ } -+ } -+#endif -+ -+ return retval; -+} -+ -+int lookup_symbol(unsigned long address, char *buf, int buflen) -+{ -+ return -ENOSYS; -+} -+ -+EXPORT_SYMBOL_GPL(is_kernel_text_address); -+EXPORT_SYMBOL_GPL(lookup_symbol); diff --git a/lustre/kernel_patches/patches/kernel_text_address-2.4.22-vanilla.patch b/lustre/kernel_patches/patches/kernel_text_address-2.4.22-vanilla.patch deleted file mode 100644 index 6e9cd93..0000000 --- a/lustre/kernel_patches/patches/kernel_text_address-2.4.22-vanilla.patch +++ /dev/null @@ -1,59 +0,0 @@ -Index: linux-2.4.22-vanilla/arch/i386/kernel/traps.c -=================================================================== ---- linux-2.4.22-vanilla.orig/arch/i386/kernel/traps.c 2003-05-16 05:28:59.000000000 +0400 -+++ linux-2.4.22-vanilla/arch/i386/kernel/traps.c 2003-11-14 00:35:36.000000000 +0300 -@@ -1004,3 +1004,40 @@ - cobalt_init(); - #endif - } -+#ifdef CONFIG_MODULES -+extern struct module *module_list; -+extern struct module kernel_module; -+#endif -+ -+int is_kernel_text_address(unsigned long addr) -+{ -+ int retval = 0; -+#ifdef CONFIG_MODULES -+ struct module *mod; -+#endif -+ if (addr >= (unsigned long) &_stext && -+ addr <= (unsigned long) &_etext); -+ return 1; -+ -+#ifdef CONFIG_MODULES -+ for (mod = module_list; mod != &kernel_module; mod = mod->next) { -+ /* mod_bound tests for addr being inside the vmalloc'ed -+ * module area. Of course it'd be better to test only -+ * for the .text subset... */ -+ if (mod_bound(addr, 0, mod)) { -+ retval = 1; -+ break; -+ } -+ } -+#endif -+ -+ return retval; -+} -+ -+int lookup_symbol(unsigned long address, char *buf, int buflen) -+{ -+ return -ENOSYS; -+} -+ -+EXPORT_SYMBOL_GPL(is_kernel_text_address); -+EXPORT_SYMBOL_GPL(lookup_symbol); -Index: linux-2.4.22-vanilla/arch/i386/kernel/Makefile -=================================================================== ---- linux-2.4.22-vanilla.orig/arch/i386/kernel/Makefile 2003-11-03 23:22:06.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/i386/kernel/Makefile 2003-11-14 00:30:28.000000000 +0300 -@@ -14,7 +14,8 @@ - - O_TARGET := kernel.o - --export-objs := mca.o mtrr.o msr.o cpuid.o microcode.o i386_ksyms.o time.o setup.o -+export-objs := mca.o mtrr.o msr.o cpuid.o microcode.o i386_ksyms.o time.o \ -+ setup.o traps.o - - obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ - ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ diff --git a/lustre/kernel_patches/patches/lookup_bdev_init_intent.patch b/lustre/kernel_patches/patches/lookup_bdev_init_intent.patch deleted file mode 100644 index 06f7939..0000000 --- a/lustre/kernel_patches/patches/lookup_bdev_init_intent.patch +++ /dev/null @@ -1,11 +0,0 @@ -diff -rupN linux-2.6.6.old/fs/block_dev.c linux-2.6.6.new/fs/block_dev.c ---- linux-2.6.6.old/fs/block_dev.c Mon May 10 05:33:22 2004 -+++ linux-2.6.6.new/fs/block_dev.c Thu Jun 24 20:34:45 2004 -@@ -834,6 +834,7 @@ struct block_device *lookup_bdev(const c - if (!path || !*path) - return ERR_PTR(-EINVAL); - -+ intent_init(&nd.intent, IT_LOOKUP); - error = path_lookup(path, LOOKUP_FOLLOW, &nd); - if (error) - return ERR_PTR(error); diff --git a/lustre/kernel_patches/patches/lustre_version.patch b/lustre/kernel_patches/patches/lustre_version.patch index b478e8a..0737286 100644 --- a/lustre/kernel_patches/patches/lustre_version.patch +++ b/lustre/kernel_patches/patches/lustre_version.patch @@ -1,3 +1,4 @@ +Version 42: export show_task() Version 41: revalidate should check working dir is a directory (b=4134) Version 40: >32000 subdirectories support for ext3 (b=3244) Version 39: add EXPORT_SYMBOL(smp_num_siblings) to vanilla-2.4.24 (b=3966) @@ -15,6 +16,6 @@ Version 34: ext3 iopen assertion (b=2517), operations on deleted "." (b=2399) --- /dev/null Fri Aug 30 17:31:37 2002 +++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h Thu Feb 13 07:58:33 2003 @@ -0,0 +1 @@ -+#define LUSTRE_KERNEL_VERSION 39 ++#define LUSTRE_KERNEL_VERSION 42 _ diff --git a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-vanilla.patch b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-vanilla.patch deleted file mode 100644 index 0c4f64b..0000000 --- a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-vanilla.patch +++ /dev/null @@ -1,117 +0,0 @@ -Index: linux-2.6.7-vanilla/fs/nfs/dir.c -=================================================================== ---- linux-2.6.7-vanilla.orig/fs/nfs/dir.c 2004-07-01 12:24:53.588555336 +0300 -+++ linux-2.6.7-vanilla/fs/nfs/dir.c 2004-07-01 12:37:42.595648488 +0300 -@@ -778,7 +778,7 @@ - if (nd->flags & LOOKUP_DIRECTORY) - return 0; - /* Are we trying to write to a read only partition? */ -- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) -+ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) - return 0; - return 1; - } -@@ -799,7 +799,7 @@ - dentry->d_op = NFS_PROTO(dir)->dentry_ops; - - /* Let vfs_create() deal with O_EXCL */ -- if (nd->intent.open.flags & O_EXCL) -+ if (nd->intent.it_flags & O_EXCL) - goto no_entry; - - /* Open the file on the server */ -@@ -807,7 +807,7 @@ - /* Revalidate parent directory attribute cache */ - nfs_revalidate_inode(NFS_SERVER(dir), dir); - -- if (nd->intent.open.flags & O_CREAT) { -+ if (nd->intent.it_flags & O_CREAT) { - nfs_begin_data_update(dir); - inode = nfs4_atomic_open(dir, dentry, nd); - nfs_end_data_update(dir); -@@ -823,7 +823,7 @@ - break; - /* This turned out not to be a regular file */ - case -ELOOP: -- if (!(nd->intent.open.flags & O_NOFOLLOW)) -+ if (!(nd->intent.it_flags & O_NOFOLLOW)) - goto no_open; - /* case -EISDIR: */ - /* case -EINVAL: */ -@@ -857,7 +857,7 @@ - dir = parent->d_inode; - if (!is_atomic_open(dir, nd)) - goto no_open; -- openflags = nd->intent.open.flags; -+ openflags = nd->intent.it_flags; - if (openflags & O_CREAT) { - /* If this is a negative dentry, just drop it */ - if (!inode) -Index: linux-2.6.7-vanilla/fs/nfs/nfs4proc.c -=================================================================== ---- linux-2.6.7-vanilla.orig/fs/nfs/nfs4proc.c 2004-07-01 12:09:22.000000000 +0300 -+++ linux-2.6.7-vanilla/fs/nfs/nfs4proc.c 2004-07-01 12:37:42.598648032 +0300 -@@ -475,17 +475,17 @@ - struct nfs4_state *state; - - if (nd->flags & LOOKUP_CREATE) { -- attr.ia_mode = nd->intent.open.create_mode; -+ attr.ia_mode = nd->intent.it_create_mode; - attr.ia_valid = ATTR_MODE; - if (!IS_POSIXACL(dir)) - attr.ia_mode &= ~current->fs->umask; - } else { - attr.ia_valid = 0; -- BUG_ON(nd->intent.open.flags & O_CREAT); -+ BUG_ON(nd->intent.it_flags & O_CREAT); - } - - cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); -- state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); -+ state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred); - put_rpccred(cred); - if (IS_ERR(state)) - return (struct inode *)state; -Index: linux-2.6.7-vanilla/fs/cifs/dir.c -=================================================================== ---- linux-2.6.7-vanilla.orig/fs/cifs/dir.c 2004-07-01 12:09:20.000000000 +0300 -+++ linux-2.6.7-vanilla/fs/cifs/dir.c 2004-07-01 12:38:17.695312528 +0300 -@@ -175,23 +175,23 @@ - } - - if(nd) { -- if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY) -+ if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY) - desiredAccess = GENERIC_READ; -- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) { -+ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY) { - desiredAccess = GENERIC_WRITE; - write_only = TRUE; -- } else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) { -+ } else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) { - /* GENERIC_ALL is too much permission to request */ - /* can cause unnecessary access denied on create */ - /* desiredAccess = GENERIC_ALL; */ - desiredAccess = GENERIC_READ | GENERIC_WRITE; - } - -- if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) -+ if((nd->intent.it_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) - disposition = FILE_CREATE; -- else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) -+ else if((nd->intent.it_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) - disposition = FILE_OVERWRITE_IF; -- else if((nd->intent.open.flags & O_CREAT) == O_CREAT) -+ else if((nd->intent.it_flags & O_CREAT) == O_CREAT) - disposition = FILE_OPEN_IF; - else { - cFYI(1,("Create flag not set in create function")); -@@ -361,7 +361,7 @@ - parent_dir_inode, direntry->d_name.name, direntry)); - - if(nd) { /* BB removeme */ -- cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.open.flags)); -+ cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.it_flags)); - } /* BB removeme BB */ - /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */ - diff --git a/lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch b/lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch deleted file mode 100644 index 3ca2d14..0000000 --- a/lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch +++ /dev/null @@ -1,20388 +0,0 @@ -Index: uml-2.6.7/arch/um/kernel/skas/trap_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/trap_user.c 2004-07-16 19:36:33.534048512 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/trap_user.c 2004-07-16 19:47:23.734203064 +0300 -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -@@ -35,14 +35,10 @@ - errno = save_errno; - } - --extern int missed_ticks[]; -- - void user_signal(int sig, union uml_pt_regs *regs) - { - struct signal_info *info; - -- if(sig == SIGVTALRM) -- missed_ticks[cpu()]++; - regs->skas.is_user = 1; - regs->skas.fault_addr = 0; - regs->skas.fault_type = 0; -Index: uml-2.6.7/arch/um/kernel/irq.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/irq.c 2004-07-16 19:37:20.013982488 +0300 -+++ uml-2.6.7/arch/um/kernel/irq.c 2004-07-16 19:47:23.717205648 +0300 -@@ -29,6 +29,7 @@ - #include "user_util.h" - #include "kern_util.h" - #include "irq_user.h" -+#include "irq_kern.h" - - static void register_irq_proc (unsigned int irq); - -@@ -83,65 +84,55 @@ - end_none - }; - --/* Not changed */ --volatile unsigned long irq_err_count; -- - /* - * Generic, controller-independent functions: - */ - --int get_irq_list(char *buf) -+int show_interrupts(struct seq_file *p, void *v) - { -- int i, j; -- unsigned long flags; -+ int i = *(loff_t *) v, j; - struct irqaction * action; -- char *p = buf; -+ unsigned long flags; - -- p += sprintf(p, " "); -- for (j=0; jtypename); -- p += sprintf(p, " %s", action->name); -+ seq_printf(p, " %14s", irq_desc[i].handler->typename); -+ seq_printf(p, " %s", action->name); - - for (action=action->next; action; action = action->next) -- p += sprintf(p, ", %s", action->name); -- *p++ = '\n'; -- end: -+ seq_printf(p, ", %s", action->name); -+ -+ seq_putc(p, '\n'); -+skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); -+ } else if (i == NR_IRQS) { -+ seq_printf(p, "NMI: "); -+ for (j = 0; j < NR_CPUS; j++) -+ if (cpu_online(j)) -+ seq_printf(p, "%10u ", nmi_count(j)); -+ seq_putc(p, '\n'); - } -- p += sprintf(p, "\n"); --#ifdef notdef --#ifdef CONFIG_SMP -- p += sprintf(p, "LOC: "); -- for (j = 0; j < num_online_cpus(); j++) -- p += sprintf(p, "%10u ", -- apic_timer_irqs[cpu_logical_map(j)]); -- p += sprintf(p, "\n"); --#endif --#endif -- p += sprintf(p, "ERR: %10lu\n", irq_err_count); -- return p - buf; --} -- - --int show_interrupts(struct seq_file *p, void *v) --{ -- return(0); -+ return 0; - } - - /* -@@ -282,13 +273,12 @@ - * 0 return value means that this irq is already being - * handled by some other CPU. (or is disabled) - */ -- int cpu = smp_processor_id(); - irq_desc_t *desc = irq_desc + irq; - struct irqaction * action; - unsigned int status; - - irq_enter(); -- kstat_cpu(cpu).irqs[irq]++; -+ kstat_this_cpu.irqs[irq]++; - spin_lock(&desc->lock); - desc->handler->ack(irq); - /* -@@ -385,7 +375,7 @@ - */ - - int request_irq(unsigned int irq, -- void (*handler)(int, void *, struct pt_regs *), -+ irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, - const char * devname, - void *dev_id) -@@ -433,15 +423,19 @@ - EXPORT_SYMBOL(request_irq); - - int um_request_irq(unsigned int irq, int fd, int type, -- void (*handler)(int, void *, struct pt_regs *), -+ irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, const char * devname, - void *dev_id) - { -- int retval; -+ int err; - -- retval = request_irq(irq, handler, irqflags, devname, dev_id); -- if(retval) return(retval); -- return(activate_fd(irq, fd, type, dev_id)); -+ err = request_irq(irq, handler, irqflags, devname, dev_id); -+ if(err) -+ return(err); -+ -+ if(fd != -1) -+ err = activate_fd(irq, fd, type, dev_id); -+ return(err); - } - - /* this was setup_x86_irq but it seems pretty generic */ -@@ -474,7 +468,8 @@ - */ - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; -- if ((old = *p) != NULL) { -+ old = *p; -+ if (old != NULL) { - /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&desc->lock,flags); -@@ -586,12 +581,14 @@ - unsigned long count, void *data) - { - int irq = (long) data, full_count = count, err; -- cpumask_t new_value, tmp; -+ cpumask_t new_value; - - if (!irq_desc[irq].handler->set_affinity) - return -EIO; - - err = cpumask_parse(buffer, count, new_value); -+ if(err) -+ return(err); - - #ifdef CONFIG_SMP - /* -@@ -599,9 +596,11 @@ - * way to make the system unusable accidentally :-) At least - * one online CPU still has to be targeted. - */ -- cpus_and(tmp, new_value, cpu_online_map); -- if (cpus_empty(tmp)) -- return -EINVAL; -+ { cpumask_t tmp; -+ cpus_and(tmp, new_value, cpu_online_map); -+ if (cpus_empty(tmp)) -+ return -EINVAL; -+ } - #endif - - irq_affinity[irq] = new_value; -Index: uml-2.6.7/arch/um/drivers/chan_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/chan_user.c 2004-07-16 19:35:52.587273368 +0300 -+++ uml-2.6.7/arch/um/drivers/chan_user.c 2004-07-16 19:47:24.392103048 +0300 -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -@@ -7,7 +7,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -22,33 +21,6 @@ - #include "choose-mode.h" - #include "mode.h" - --void generic_close(int fd, void *unused) --{ -- close(fd); --} -- --int generic_read(int fd, char *c_out, void *unused) --{ -- int n; -- -- n = read(fd, c_out, sizeof(*c_out)); -- if(n < 0){ -- if(errno == EAGAIN) return(0); -- return(-errno); -- } -- else if(n == 0) return(-EIO); -- return(1); --} -- --int generic_write(int fd, const char *buf, int n, void *unused) --{ -- int count; -- -- count = write(fd, buf, n); -- if(count < 0) return(-errno); -- return(count); --} -- - int generic_console_write(int fd, const char *buf, int n, void *unused) - { - struct termios save, new; -@@ -65,26 +37,6 @@ - return(err); - } - --int generic_window_size(int fd, void *unused, unsigned short *rows_out, -- unsigned short *cols_out) --{ -- struct winsize size; -- int ret = 0; -- -- if(ioctl(fd, TIOCGWINSZ, &size) == 0){ -- ret = ((*rows_out != size.ws_row) || -- (*cols_out != size.ws_col)); -- *rows_out = size.ws_row; -- *cols_out = size.ws_col; -- } -- return(ret); --} -- --void generic_free(void *data) --{ -- kfree(data); --} -- - static void winch_handler(int sig) - { - } -@@ -100,14 +52,16 @@ - struct winch_data *data = arg; - sigset_t sigs; - int pty_fd, pipe_fd; -+ int count, err; - char c = 1; - -- close(data->close_me); -+ os_close_file(data->close_me); - pty_fd = data->pty_fd; - pipe_fd = data->pipe_fd; -- if(write(pipe_fd, &c, sizeof(c)) != sizeof(c)) -+ count = os_write_file(pipe_fd, &c, sizeof(c)); -+ if(count != sizeof(c)) - printk("winch_thread : failed to write synchronization " -- "byte, errno = %d\n", errno); -+ "byte, err = %d\n", -count); - - signal(SIGWINCH, winch_handler); - sigfillset(&sigs); -@@ -123,26 +77,24 @@ - exit(1); - } - -- if(ioctl(pty_fd, TIOCSCTTY, 0) < 0){ -- printk("winch_thread : TIOCSCTTY failed, errno = %d\n", errno); -- exit(1); -- } -- if(tcsetpgrp(pty_fd, os_getpid()) < 0){ -- printk("winch_thread : tcsetpgrp failed, errno = %d\n", errno); -+ err = os_new_tty_pgrp(pty_fd, os_getpid()); -+ if(err < 0){ -+ printk("winch_thread : new_tty_pgrp failed, err = %d\n", -err); - exit(1); - } - -- if(read(pipe_fd, &c, sizeof(c)) != sizeof(c)) -+ count = os_read_file(pipe_fd, &c, sizeof(c)); -+ if(count != sizeof(c)) - printk("winch_thread : failed to read synchronization byte, " -- "errno = %d\n", errno); -+ "err = %d\n", -count); - - while(1){ - pause(); - -- if(write(pipe_fd, &c, sizeof(c)) != sizeof(c)){ -- printk("winch_thread : write failed, errno = %d\n", -- errno); -- } -+ count = os_write_file(pipe_fd, &c, sizeof(c)); -+ if(count != sizeof(c)) -+ printk("winch_thread : write failed, err = %d\n", -+ -count); - } - } - -@@ -154,8 +106,8 @@ - char c; - - err = os_pipe(fds, 1, 1); -- if(err){ -- printk("winch_tramp : os_pipe failed, errno = %d\n", -err); -+ if(err < 0){ -+ printk("winch_tramp : os_pipe failed, err = %d\n", -err); - return(err); - } - -@@ -168,12 +120,12 @@ - return(pid); - } - -- close(fds[1]); -+ os_close_file(fds[1]); - *fd_out = fds[0]; -- n = read(fds[0], &c, sizeof(c)); -+ n = os_read_file(fds[0], &c, sizeof(c)); - if(n != sizeof(c)){ - printk("winch_tramp : failed to read synchronization byte\n"); -- printk("read returned %d, errno = %d\n", n, errno); -+ printk("read failed, err = %d\n", -n); - printk("fd %d will not support SIGWINCH\n", fd); - *fd_out = -1; - } -@@ -183,20 +135,24 @@ - void register_winch(int fd, void *device_data) - { - int pid, thread, thread_fd; -+ int count; - char c = 1; - -- if(!isatty(fd)) return; -+ if(!isatty(fd)) -+ return; - - pid = tcgetpgrp(fd); -- if(!CHOOSE_MODE(is_tracer_winch(pid, fd, device_data), 0) && -- (pid == -1)){ -+ if(!CHOOSE_MODE_PROC(is_tracer_winch, is_skas_winch, pid, fd, -+ device_data) && (pid == -1)){ - thread = winch_tramp(fd, device_data, &thread_fd); - if(fd != -1){ - register_winch_irq(thread_fd, fd, thread, device_data); - -- if(write(thread_fd, &c, sizeof(c)) != sizeof(c)) -+ count = os_write_file(thread_fd, &c, sizeof(c)); -+ if(count != sizeof(c)) - printk("register_winch : failed to write " -- "synchronization byte\n"); -+ "synchronization byte, err = %d\n", -+ -count); - } - } - } -Index: uml-2.6.7/arch/um/drivers/cow_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/cow_user.c 2004-07-16 19:47:23.608222216 +0300 -+++ uml-2.6.7/arch/um/drivers/cow_user.c 2004-07-16 19:47:23.680211272 +0300 -@@ -0,0 +1,375 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "os.h" -+ -+#include "cow.h" -+#include "cow_sys.h" -+ -+#define PATH_LEN_V1 256 -+ -+struct cow_header_v1 { -+ int magic; -+ int version; -+ char backing_file[PATH_LEN_V1]; -+ time_t mtime; -+ __u64 size; -+ int sectorsize; -+}; -+ -+#define PATH_LEN_V2 MAXPATHLEN -+ -+struct cow_header_v2 { -+ unsigned long magic; -+ unsigned long version; -+ char backing_file[PATH_LEN_V2]; -+ time_t mtime; -+ __u64 size; -+ int sectorsize; -+}; -+ -+/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in -+ * case other systems have different values for MAXPATHLEN -+ */ -+#define PATH_LEN_V3 4096 -+ -+/* Changes from V2 - -+ * PATH_LEN_V3 as described above -+ * Explicitly specify field bit lengths for systems with different -+ * lengths for the usual C types. Not sure whether char or -+ * time_t should be changed, this can be changed later without -+ * breaking compatibility -+ * Add alignment field so that different alignments can be used for the -+ * bitmap and data -+ * Add cow_format field to allow for the possibility of different ways -+ * of specifying the COW blocks. For now, the only value is 0, -+ * for the traditional COW bitmap. -+ * Move the backing_file field to the end of the header. This allows -+ * for the possibility of expanding it into the padding required -+ * by the bitmap alignment. -+ * The bitmap and data portions of the file will be aligned as specified -+ * by the alignment field. This is to allow COW files to be -+ * put on devices with restrictions on access alignments, such as -+ * /dev/raw, with a 512 byte alignment restriction. This also -+ * allows the data to be more aligned more strictly than on -+ * sector boundaries. This is needed for ubd-mmap, which needs -+ * the data to be page aligned. -+ * Fixed (finally!) the rounding bug -+ */ -+ -+struct cow_header_v3 { -+ __u32 magic; -+ __u32 version; -+ time_t mtime; -+ __u64 size; -+ __u32 sectorsize; -+ __u32 alignment; -+ __u32 cow_format; -+ char backing_file[PATH_LEN_V3]; -+}; -+ -+/* COW format definitions - for now, we have only the usual COW bitmap */ -+#define COW_BITMAP 0 -+ -+union cow_header { -+ struct cow_header_v1 v1; -+ struct cow_header_v2 v2; -+ struct cow_header_v3 v3; -+}; -+ -+#define COW_MAGIC 0x4f4f4f4d /* MOOO */ -+#define COW_VERSION 3 -+ -+#define DIV_ROUND(x, len) (((x) + (len) - 1) / (len)) -+#define ROUND_UP(x, align) DIV_ROUND(x, align) * (align) -+ -+void cow_sizes(int version, __u64 size, int sectorsize, int align, -+ int bitmap_offset, unsigned long *bitmap_len_out, -+ int *data_offset_out) -+{ -+ if(version < 3){ -+ *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); -+ -+ *data_offset_out = bitmap_offset + *bitmap_len_out; -+ *data_offset_out = (*data_offset_out + sectorsize - 1) / -+ sectorsize; -+ *data_offset_out *= sectorsize; -+ } -+ else { -+ *bitmap_len_out = DIV_ROUND(size, sectorsize); -+ *bitmap_len_out = DIV_ROUND(*bitmap_len_out, 8); -+ -+ *data_offset_out = bitmap_offset + *bitmap_len_out; -+ *data_offset_out = ROUND_UP(*data_offset_out, align); -+ } -+} -+ -+static int absolutize(char *to, int size, char *from) -+{ -+ char save_cwd[256], *slash; -+ int remaining; -+ -+ if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { -+ cow_printf("absolutize : unable to get cwd - errno = %d\n", -+ errno); -+ return(-1); -+ } -+ slash = strrchr(from, '/'); -+ if(slash != NULL){ -+ *slash = '\0'; -+ if(chdir(from)){ -+ *slash = '/'; -+ cow_printf("absolutize : Can't cd to '%s' - " -+ "errno = %d\n", from, errno); -+ return(-1); -+ } -+ *slash = '/'; -+ if(getcwd(to, size) == NULL){ -+ cow_printf("absolutize : unable to get cwd of '%s' - " -+ "errno = %d\n", from, errno); -+ return(-1); -+ } -+ remaining = size - strlen(to); -+ if(strlen(slash) + 1 > remaining){ -+ cow_printf("absolutize : unable to fit '%s' into %d " -+ "chars\n", from, size); -+ return(-1); -+ } -+ strcat(to, slash); -+ } -+ else { -+ if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ -+ cow_printf("absolutize : unable to fit '%s' into %d " -+ "chars\n", from, size); -+ return(-1); -+ } -+ strcpy(to, save_cwd); -+ strcat(to, "/"); -+ strcat(to, from); -+ } -+ chdir(save_cwd); -+ return(0); -+} -+ -+int write_cow_header(char *cow_file, int fd, char *backing_file, -+ int sectorsize, int alignment, long long *size) -+{ -+ struct cow_header_v3 *header; -+ unsigned long modtime; -+ int err; -+ -+ err = cow_seek_file(fd, 0); -+ if(err < 0){ -+ cow_printf("write_cow_header - lseek failed, err = %d\n", -err); -+ goto out; -+ } -+ -+ err = -ENOMEM; -+ header = cow_malloc(sizeof(*header)); -+ if(header == NULL){ -+ cow_printf("Failed to allocate COW V3 header\n"); -+ goto out; -+ } -+ header->magic = htonl(COW_MAGIC); -+ header->version = htonl(COW_VERSION); -+ -+ err = -EINVAL; -+ if(strlen(backing_file) > sizeof(header->backing_file) - 1){ -+ cow_printf("Backing file name \"%s\" is too long - names are " -+ "limited to %d characters\n", backing_file, -+ sizeof(header->backing_file) - 1); -+ goto out_free; -+ } -+ -+ if(absolutize(header->backing_file, sizeof(header->backing_file), -+ backing_file)) -+ goto out_free; -+ -+ err = os_file_modtime(header->backing_file, &modtime); -+ if(err < 0){ -+ cow_printf("Backing file '%s' mtime request failed, " -+ "err = %d\n", header->backing_file, -err); -+ goto out_free; -+ } -+ -+ err = cow_file_size(header->backing_file, size); -+ if(err < 0){ -+ cow_printf("Couldn't get size of backing file '%s', " -+ "err = %d\n", header->backing_file, -err); -+ goto out_free; -+ } -+ -+ header->mtime = htonl(modtime); -+ header->size = htonll(*size); -+ header->sectorsize = htonl(sectorsize); -+ header->alignment = htonl(alignment); -+ header->cow_format = COW_BITMAP; -+ -+ err = os_write_file(fd, header, sizeof(*header)); -+ if(err != sizeof(*header)){ -+ cow_printf("Write of header to new COW file '%s' failed, " -+ "err = %d\n", cow_file, -err); -+ goto out_free; -+ } -+ err = 0; -+ out_free: -+ cow_free(header); -+ out: -+ return(err); -+} -+ -+int file_reader(__u64 offset, char *buf, int len, void *arg) -+{ -+ int fd = *((int *) arg); -+ -+ return(pread(fd, buf, len, offset)); -+} -+ -+/* XXX Need to sanity-check the values read from the header */ -+ -+int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, -+ __u32 *version_out, char **backing_file_out, -+ time_t *mtime_out, __u64 *size_out, -+ int *sectorsize_out, __u32 *align_out, -+ int *bitmap_offset_out) -+{ -+ union cow_header *header; -+ char *file; -+ int err, n; -+ unsigned long version, magic; -+ -+ header = cow_malloc(sizeof(*header)); -+ if(header == NULL){ -+ cow_printf("read_cow_header - Failed to allocate header\n"); -+ return(-ENOMEM); -+ } -+ err = -EINVAL; -+ n = (*reader)(0, (char *) header, sizeof(*header), arg); -+ if(n < offsetof(typeof(header->v1), backing_file)){ -+ cow_printf("read_cow_header - short header\n"); -+ goto out; -+ } -+ -+ magic = header->v1.magic; -+ if(magic == COW_MAGIC) { -+ version = header->v1.version; -+ } -+ else if(magic == ntohl(COW_MAGIC)){ -+ version = ntohl(header->v1.version); -+ } -+ /* No error printed because the non-COW case comes through here */ -+ else goto out; -+ -+ *version_out = version; -+ -+ if(version == 1){ -+ if(n < sizeof(header->v1)){ -+ cow_printf("read_cow_header - failed to read V1 " -+ "header\n"); -+ goto out; -+ } -+ *mtime_out = header->v1.mtime; -+ *size_out = header->v1.size; -+ *sectorsize_out = header->v1.sectorsize; -+ *bitmap_offset_out = sizeof(header->v1); -+ *align_out = *sectorsize_out; -+ file = header->v1.backing_file; -+ } -+ else if(version == 2){ -+ if(n < sizeof(header->v2)){ -+ cow_printf("read_cow_header - failed to read V2 " -+ "header\n"); -+ goto out; -+ } -+ *mtime_out = ntohl(header->v2.mtime); -+ *size_out = ntohll(header->v2.size); -+ *sectorsize_out = ntohl(header->v2.sectorsize); -+ *bitmap_offset_out = sizeof(header->v2); -+ *align_out = *sectorsize_out; -+ file = header->v2.backing_file; -+ } -+ else if(version == 3){ -+ if(n < sizeof(header->v3)){ -+ cow_printf("read_cow_header - failed to read V2 " -+ "header\n"); -+ goto out; -+ } -+ *mtime_out = ntohl(header->v3.mtime); -+ *size_out = ntohll(header->v3.size); -+ *sectorsize_out = ntohl(header->v3.sectorsize); -+ *align_out = ntohl(header->v3.alignment); -+ *bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out); -+ file = header->v3.backing_file; -+ } -+ else { -+ cow_printf("read_cow_header - invalid COW version\n"); -+ goto out; -+ } -+ err = -ENOMEM; -+ *backing_file_out = cow_strdup(file); -+ if(*backing_file_out == NULL){ -+ cow_printf("read_cow_header - failed to allocate backing " -+ "file\n"); -+ goto out; -+ } -+ err = 0; -+ out: -+ cow_free(header); -+ return(err); -+} -+ -+int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize, -+ int alignment, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out) -+{ -+ __u64 size, offset; -+ char zero = 0; -+ int err; -+ -+ err = write_cow_header(cow_file, fd, backing_file, sectorsize, -+ alignment, &size); -+ if(err) -+ goto out; -+ -+ *bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment); -+ cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out, -+ bitmap_len_out, data_offset_out); -+ -+ offset = *data_offset_out + size - sizeof(zero); -+ err = cow_seek_file(fd, offset); -+ if(err < 0){ -+ cow_printf("cow bitmap lseek failed : err = %d\n", -err); -+ goto out; -+ } -+ -+ /* does not really matter how much we write it is just to set EOF -+ * this also sets the entire COW bitmap -+ * to zero without having to allocate it -+ */ -+ err = cow_write_file(fd, &zero, sizeof(zero)); -+ if(err != sizeof(zero)){ -+ cow_printf("Write of bitmap to new COW file '%s' failed, " -+ "err = %d\n", cow_file, -err); -+ err = -EINVAL; -+ goto out; -+ } -+ -+ return(0); -+ -+ out: -+ return(err); -+} -+ -+/* -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/drivers/chan_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/chan_kern.c 2004-07-16 19:37:43.028483752 +0300 -+++ uml-2.6.7/arch/um/drivers/chan_kern.c 2004-07-16 19:47:24.392103048 +0300 -@@ -8,6 +8,7 @@ - #include - #include - #include -+#include - #include - #include - #include "chan_kern.h" -@@ -16,6 +17,7 @@ - #include "irq_user.h" - #include "sigio.h" - #include "line.h" -+#include "os.h" - - static void *not_configged_init(char *str, int device, struct chan_opts *opts) - { -@@ -86,6 +88,52 @@ - .winch = 0, - }; - -+void generic_close(int fd, void *unused) -+{ -+ os_close_file(fd); -+} -+ -+int generic_read(int fd, char *c_out, void *unused) -+{ -+ int n; -+ -+ n = os_read_file(fd, c_out, sizeof(*c_out)); -+ -+ if(n == -EAGAIN) -+ return(0); -+ else if(n == 0) -+ return(-EIO); -+ return(n); -+} -+ -+int generic_write(int fd, const char *buf, int n, void *unused) -+{ -+ return(os_write_file(fd, buf, n)); -+} -+ -+int generic_window_size(int fd, void *unused, unsigned short *rows_out, -+ unsigned short *cols_out) -+{ -+ int rows, cols; -+ int ret; -+ -+ ret = os_window_size(fd, &rows, &cols); -+ if(ret < 0) -+ return(ret); -+ -+ ret = ((*rows_out != rows) || (*cols_out != cols)); -+ -+ *rows_out = rows; -+ *cols_out = cols; -+ -+ return(ret); -+} -+ -+void generic_free(void *data) -+{ -+ kfree(data); -+} -+ - static void tty_receive_char(struct tty_struct *tty, char ch) - { - if(tty == NULL) return; -@@ -265,6 +313,11 @@ - { - int n = 0; - -+ if(chan == NULL){ -+ CONFIG_CHUNK(str, size, n, "none", 1); -+ return(n); -+ } -+ - CONFIG_CHUNK(str, size, n, chan->ops->type, 0); - - if(chan->dev == NULL){ -@@ -420,7 +473,8 @@ - INIT_LIST_HEAD(chans); - } - -- if((out = strchr(str, ',')) != NULL){ -+ out = strchr(str, ','); -+ if(out != NULL){ - in = str; - *out = '\0'; - out++; -@@ -475,12 +529,15 @@ - goto out; - } - err = chan->ops->read(chan->fd, &c, chan->data); -- if(err > 0) tty_receive_char(tty, c); -+ if(err > 0) -+ tty_receive_char(tty, c); - } while(err > 0); -+ - if(err == 0) reactivate_fd(chan->fd, irq); - if(err == -EIO){ - if(chan->primary){ -- if(tty != NULL) tty_hangup(tty); -+ if(tty != NULL) -+ tty_hangup(tty); - line_disable(dev, irq); - close_chan(chans); - free_chan(chans); -Index: uml-2.6.7/arch/um/drivers/cow_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/cow_kern.c 2004-07-16 19:47:23.608222216 +0300 -+++ uml-2.6.7/arch/um/drivers/cow_kern.c 2004-07-16 19:47:23.679211424 +0300 -@@ -0,0 +1,630 @@ -+#define COW_MAJOR 60 -+#define MAJOR_NR COW_MAJOR -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "2_5compat.h" -+#include "cow.h" -+#include "ubd_user.h" -+ -+#define COW_SHIFT 4 -+ -+struct cow { -+ int count; -+ char *cow_path; -+ dev_t cow_dev; -+ struct block_device *cow_bdev; -+ char *backing_path; -+ dev_t backing_dev; -+ struct block_device *backing_bdev; -+ int sectorsize; -+ unsigned long *bitmap; -+ unsigned long bitmap_len; -+ int bitmap_offset; -+ int data_offset; -+ devfs_handle_t devfs; -+ struct semaphore sem; -+ struct semaphore io_sem; -+ atomic_t working; -+ spinlock_t io_lock; -+ struct buffer_head *bh; -+ struct buffer_head *bhtail; -+ void *end_io; -+}; -+ -+#define DEFAULT_COW { \ -+ .count = 0, \ -+ .cow_path = NULL, \ -+ .cow_dev = 0, \ -+ .backing_path = NULL, \ -+ .backing_dev = 0, \ -+ .bitmap = NULL, \ -+ .bitmap_len = 0, \ -+ .bitmap_offset = 0, \ -+ .data_offset = 0, \ -+ .devfs = NULL, \ -+ .working = ATOMIC_INIT(0), \ -+ .io_lock = SPIN_LOCK_UNLOCKED, \ -+} -+ -+#define MAX_DEV (8) -+#define MAX_MINOR (MAX_DEV << COW_SHIFT) -+ -+struct cow cow_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_COW }; -+ -+/* Not modified by this driver */ -+static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE }; -+static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 }; -+ -+/* Protected by cow_lock */ -+static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 }; -+ -+static struct hd_struct cow_part[MAX_MINOR] = -+ { [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } }; -+ -+/* Protected by io_request_lock */ -+static request_queue_t *cow_queue; -+ -+static int cow_open(struct inode *inode, struct file *filp); -+static int cow_release(struct inode * inode, struct file * file); -+static int cow_ioctl(struct inode * inode, struct file * file, -+ unsigned int cmd, unsigned long arg); -+static int cow_revalidate(kdev_t rdev); -+ -+static struct block_device_operations cow_blops = { -+ .open = cow_open, -+ .release = cow_release, -+ .ioctl = cow_ioctl, -+ .revalidate = cow_revalidate, -+}; -+ -+/* Initialized in an initcall, and unchanged thereafter */ -+devfs_handle_t cow_dir_handle; -+ -+#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \ -+{ \ -+ .major = maj, \ -+ .major_name = name, \ -+ .minor_shift = shift, \ -+ .max_p = 1 << shift, \ -+ .part = parts, \ -+ .sizes = bsizes, \ -+ .nr_real = max, \ -+ .real_devices = NULL, \ -+ .next = NULL, \ -+ .fops = blops, \ -+ .de_arr = NULL, \ -+ .flags = 0 \ -+} -+ -+static spinlock_t cow_lock = SPIN_LOCK_UNLOCKED; -+ -+static struct gendisk cow_gendisk = INIT_GENDISK(MAJOR_NR, "cow", cow_part, -+ COW_SHIFT, sizes, MAX_DEV, -+ &cow_blops); -+ -+static int cow_add(int n) -+{ -+ struct cow *dev = &cow_dev[n]; -+ char name[sizeof("nnnnnn\0")]; -+ int err = -ENODEV; -+ -+ if(dev->cow_path == NULL) -+ goto out; -+ -+ sprintf(name, "%d", n); -+ dev->devfs = devfs_register(cow_dir_handle, name, DEVFS_FL_REMOVABLE, -+ MAJOR_NR, n << COW_SHIFT, S_IFBLK | -+ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, -+ &cow_blops, NULL); -+ -+ init_MUTEX_LOCKED(&dev->sem); -+ init_MUTEX(&dev->io_sem); -+ -+ return(0); -+ -+ out: -+ return(err); -+} -+ -+/* -+ * Add buffer_head to back of pending list -+ */ -+static void cow_add_bh(struct cow *cow, struct buffer_head *bh) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&cow->io_lock, flags); -+ if(cow->bhtail != NULL){ -+ cow->bhtail->b_reqnext = bh; -+ cow->bhtail = bh; -+ } -+ else { -+ cow->bh = bh; -+ cow->bhtail = bh; -+ } -+ spin_unlock_irqrestore(&cow->io_lock, flags); -+} -+ -+/* -+* Grab first pending buffer -+*/ -+static struct buffer_head *cow_get_bh(struct cow *cow) -+{ -+ struct buffer_head *bh; -+ -+ spin_lock_irq(&cow->io_lock); -+ bh = cow->bh; -+ if(bh != NULL){ -+ if(bh == cow->bhtail) -+ cow->bhtail = NULL; -+ cow->bh = bh->b_reqnext; -+ bh->b_reqnext = NULL; -+ } -+ spin_unlock_irq(&cow->io_lock); -+ -+ return(bh); -+} -+ -+static void cow_handle_bh(struct cow *cow, struct buffer_head *bh, -+ struct buffer_head **cow_bh, int ncow_bh) -+{ -+ int i; -+ -+ if(ncow_bh > 0) -+ ll_rw_block(WRITE, ncow_bh, cow_bh); -+ -+ for(i = 0; i < ncow_bh ; i++){ -+ wait_on_buffer(cow_bh[i]); -+ brelse(cow_bh[i]); -+ } -+ -+ ll_rw_block(WRITE, 1, &bh); -+ brelse(bh); -+} -+ -+static struct buffer_head *cow_new_bh(struct cow *dev, int sector) -+{ -+ struct buffer_head *bh; -+ -+ sector = (dev->bitmap_offset + sector / 8) / dev->sectorsize; -+ bh = getblk(dev->cow_dev, sector, dev->sectorsize); -+ memcpy(bh->b_data, dev->bitmap + sector / (8 * sizeof(dev->bitmap[0])), -+ dev->sectorsize); -+ return(bh); -+} -+ -+/* Copied from loop.c, needed to avoid deadlocking in make_request. */ -+ -+static int cow_thread(void *data) -+{ -+ struct cow *dev = data; -+ struct buffer_head *bh; -+ -+ daemonize(); -+ exit_files(current); -+ -+ sprintf(current->comm, "cow%d", dev - cow_dev); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ sigfillset(¤t->blocked); -+ flush_signals(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ atomic_inc(&dev->working); -+ -+ current->policy = SCHED_OTHER; -+ current->nice = -20; -+ -+ current->flags |= PF_NOIO; -+ -+ /* -+ * up sem, we are running -+ */ -+ up(&dev->sem); -+ -+ for(;;){ -+ int start, len, nbh, i, update_bitmap = 0; -+ struct buffer_head *cow_bh[2]; -+ -+ down_interruptible(&dev->io_sem); -+ /* -+ * could be upped because of tear-down, not because of -+ * pending work -+ */ -+ if(!atomic_read(&dev->working)) -+ break; -+ -+ bh = cow_get_bh(dev); -+ if(bh == NULL){ -+ printk(KERN_ERR "cow: missing bh\n"); -+ continue; -+ } -+ -+ start = bh->b_blocknr * bh->b_size / dev->sectorsize; -+ len = bh->b_size / dev->sectorsize; -+ for(i = 0; i < len ; i++){ -+ if(ubd_test_bit(start + i, -+ (unsigned char *) dev->bitmap)) -+ continue; -+ -+ update_bitmap = 1; -+ ubd_set_bit(start + i, (unsigned char *) dev->bitmap); -+ } -+ -+ cow_bh[0] = NULL; -+ cow_bh[1] = NULL; -+ nbh = 0; -+ if(update_bitmap){ -+ cow_bh[0] = cow_new_bh(dev, start); -+ nbh++; -+ if(start / dev->sectorsize != -+ (start + len) / dev->sectorsize){ -+ cow_bh[1] = cow_new_bh(dev, start + len); -+ nbh++; -+ } -+ } -+ -+ bh->b_dev = dev->cow_dev; -+ bh->b_blocknr += dev->data_offset / dev->sectorsize; -+ -+ cow_handle_bh(dev, bh, cow_bh, nbh); -+ -+ /* -+ * upped both for pending work and tear-down, lo_pending -+ * will hit zero then -+ */ -+ if(atomic_dec_and_test(&dev->working)) -+ break; -+ } -+ -+ up(&dev->sem); -+ return(0); -+} -+ -+static int cow_make_request(request_queue_t *q, int rw, struct buffer_head *bh) -+{ -+ struct cow *dev; -+ int n, minor; -+ -+ minor = MINOR(bh->b_rdev); -+ n = minor >> COW_SHIFT; -+ dev = &cow_dev[n]; -+ -+ dev->end_io = NULL; -+ if(ubd_test_bit(bh->b_rsector, (unsigned char *) dev->bitmap)){ -+ bh->b_rdev = dev->cow_dev; -+ bh->b_rsector += dev->data_offset / dev->sectorsize; -+ } -+ else if(rw == WRITE){ -+ bh->b_dev = dev->cow_dev; -+ bh->b_blocknr += dev->data_offset / dev->sectorsize; -+ -+ cow_add_bh(dev, bh); -+ up(&dev->io_sem); -+ return(0); -+ } -+ else { -+ bh->b_rdev = dev->backing_dev; -+ } -+ -+ return(1); -+} -+ -+int cow_init(void) -+{ -+ int i; -+ -+ cow_dir_handle = devfs_mk_dir (NULL, "cow", NULL); -+ if (devfs_register_blkdev(MAJOR_NR, "cow", &cow_blops)) { -+ printk(KERN_ERR "cow: unable to get major %d\n", MAJOR_NR); -+ return -1; -+ } -+ read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ -+ blksize_size[MAJOR_NR] = blk_sizes; -+ blk_size[MAJOR_NR] = sizes; -+ INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes); -+ -+ cow_queue = BLK_DEFAULT_QUEUE(MAJOR_NR); -+ blk_init_queue(cow_queue, NULL); -+ INIT_ELV(cow_queue, &cow_queue->elevator); -+ blk_queue_make_request(cow_queue, cow_make_request); -+ -+ add_gendisk(&cow_gendisk); -+ -+ for(i=0;i 0){ -+ n = (left > blocksize) ? blocksize : left; -+ -+ bh = bread(dev, block, (n < 512) ? 512 : n); -+ if(bh == NULL) -+ return(-EIO); -+ -+ n -= offset; -+ memcpy(&buf[cur], bh->b_data + offset, n); -+ block++; -+ left -= n; -+ cur += n; -+ offset = 0; -+ brelse(bh); -+ } -+ -+ return(count); -+} -+ -+static int cow_open(struct inode *inode, struct file *filp) -+{ -+ int (*dev_ioctl)(struct inode *, struct file *, unsigned int, -+ unsigned long); -+ mm_segment_t fs; -+ struct cow *dev; -+ __u64 size; -+ __u32 version, align; -+ time_t mtime; -+ char *backing_file; -+ int n, offset, err = 0; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ if(n >= MAX_DEV) -+ return(-ENODEV); -+ dev = &cow_dev[n]; -+ offset = n << COW_SHIFT; -+ -+ spin_lock(&cow_lock); -+ -+ if(dev->count == 0){ -+ dev->cow_dev = name_to_kdev_t(dev->cow_path); -+ if(dev->cow_dev == 0){ -+ printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") " -+ "failed\n", dev->cow_path); -+ err = -ENODEV; -+ } -+ -+ dev->backing_dev = name_to_kdev_t(dev->backing_path); -+ if(dev->backing_dev == 0){ -+ printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") " -+ "failed\n", dev->backing_path); -+ err = -ENODEV; -+ } -+ -+ if(err) -+ goto out; -+ -+ dev->cow_bdev = bdget(dev->cow_dev); -+ if(dev->cow_bdev == NULL){ -+ printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", -+ dev->cow_path); -+ err = -ENOMEM; -+ } -+ dev->backing_bdev = bdget(dev->backing_dev); -+ if(dev->backing_bdev == NULL){ -+ printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", -+ dev->backing_path); -+ err = -ENOMEM; -+ } -+ -+ if(err) -+ goto out; -+ -+ err = blkdev_get(dev->cow_bdev, FMODE_READ|FMODE_WRITE, 0, -+ BDEV_RAW); -+ if(err){ -+ printk("cow_open - blkdev_get of COW device failed, " -+ "error = %d\n", err); -+ goto out; -+ } -+ -+ err = blkdev_get(dev->backing_bdev, FMODE_READ, 0, BDEV_RAW); -+ if(err){ -+ printk("cow_open - blkdev_get of backing device " -+ "failed, error = %d\n", err); -+ goto out; -+ } -+ -+ err = read_cow_header(reader, &dev->cow_dev, &version, -+ &backing_file, &mtime, &size, -+ &dev->sectorsize, &align, -+ &dev->bitmap_offset); -+ if(err){ -+ printk(KERN_ERR "cow_open - read_cow_header failed, " -+ "err = %d\n", err); -+ goto out; -+ } -+ -+ cow_sizes(version, size, dev->sectorsize, align, -+ dev->bitmap_offset, &dev->bitmap_len, -+ &dev->data_offset); -+ dev->bitmap = (void *) vmalloc(dev->bitmap_len); -+ if(dev->bitmap == NULL){ -+ err = -ENOMEM; -+ printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); -+ goto out; -+ } -+ flush_tlb_kernel_vm(); -+ -+ err = reader(dev->bitmap_offset, (char *) dev->bitmap, -+ dev->bitmap_len, &dev->cow_dev); -+ if(err < 0){ -+ printk(KERN_ERR "Failed to read COW bitmap\n"); -+ vfree(dev->bitmap); -+ goto out; -+ } -+ -+ dev_ioctl = dev->backing_bdev->bd_op->ioctl; -+ fs = get_fs(); -+ set_fs(KERNEL_DS); -+ err = (*dev_ioctl)(inode, filp, BLKGETSIZE, -+ (unsigned long) &sizes[offset]); -+ set_fs(fs); -+ if(err){ -+ printk(KERN_ERR "cow_open - BLKGETSIZE failed, " -+ "error = %d\n", err); -+ goto out; -+ } -+ -+ kernel_thread(cow_thread, dev, -+ CLONE_FS | CLONE_FILES | CLONE_SIGHAND); -+ down(&dev->sem); -+ } -+ dev->count++; -+ out: -+ spin_unlock(&cow_lock); -+ return(err); -+} -+ -+static int cow_release(struct inode * inode, struct file * file) -+{ -+ struct cow *dev; -+ int n, err; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ if(n >= MAX_DEV) -+ return(-ENODEV); -+ dev = &cow_dev[n]; -+ -+ spin_lock(&cow_lock); -+ -+ if(--dev->count > 0) -+ goto out; -+ -+ err = blkdev_put(dev->cow_bdev, BDEV_RAW); -+ if(err) -+ printk("cow_release - blkdev_put of cow device failed, " -+ "error = %d\n", err); -+ bdput(dev->cow_bdev); -+ dev->cow_bdev = 0; -+ -+ err = blkdev_put(dev->backing_bdev, BDEV_RAW); -+ if(err) -+ printk("cow_release - blkdev_put of backing device failed, " -+ "error = %d\n", err); -+ bdput(dev->backing_bdev); -+ dev->backing_bdev = 0; -+ -+ out: -+ spin_unlock(&cow_lock); -+ return(0); -+} -+ -+static int cow_ioctl(struct inode * inode, struct file * file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct cow *dev; -+ int (*dev_ioctl)(struct inode *, struct file *, unsigned int, -+ unsigned long); -+ int n; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ if(n >= MAX_DEV) -+ return(-ENODEV); -+ dev = &cow_dev[n]; -+ -+ dev_ioctl = dev->backing_bdev->bd_op->ioctl; -+ return((*dev_ioctl)(inode, file, cmd, arg)); -+} -+ -+static int cow_revalidate(kdev_t rdev) -+{ -+ printk(KERN_ERR "Need to implement cow_revalidate\n"); -+ return(0); -+} -+ -+static int parse_unit(char **ptr) -+{ -+ char *str = *ptr, *end; -+ int n = -1; -+ -+ if(isdigit(*str)) { -+ n = simple_strtoul(str, &end, 0); -+ if(end == str) -+ return(-1); -+ *ptr = end; -+ } -+ else if (('a' <= *str) && (*str <= 'h')) { -+ n = *str - 'a'; -+ str++; -+ *ptr = str; -+ } -+ return(n); -+} -+ -+static int cow_setup(char *str) -+{ -+ struct cow *dev; -+ char *cow_name, *backing_name; -+ int unit; -+ -+ unit = parse_unit(&str); -+ if(unit < 0){ -+ printk(KERN_ERR "cow_setup - Couldn't parse unit number\n"); -+ return(1); -+ } -+ -+ if(*str != '='){ -+ printk(KERN_ERR "cow_setup - Missing '=' after unit " -+ "number\n"); -+ return(1); -+ } -+ str++; -+ -+ cow_name = str; -+ backing_name = strchr(str, ','); -+ if(backing_name == NULL){ -+ printk(KERN_ERR "cow_setup - missing backing device name\n"); -+ return(0); -+ } -+ *backing_name = '\0'; -+ backing_name++; -+ -+ spin_lock(&cow_lock); -+ -+ dev = &cow_dev[unit]; -+ dev->cow_path = cow_name; -+ dev->backing_path = backing_name; -+ -+ spin_unlock(&cow_lock); -+ return(0); -+} -+ -+__setup("cow", cow_setup); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/main.c -=================================================================== ---- uml-2.6.7.orig/arch/um/main.c 2004-07-16 19:37:17.425376016 +0300 -+++ uml-2.6.7/arch/um/main.c 2004-07-16 19:47:23.762198808 +0300 -@@ -8,6 +8,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -123,12 +124,14 @@ - - set_stklim(); - -- if((new_argv = malloc((argc + 1) * sizeof(char *))) == NULL){ -+ new_argv = malloc((argc + 1) * sizeof(char *)); -+ if(new_argv == NULL){ - perror("Mallocing argv"); - exit(1); - } - for(i=0;i - #include "user_util.h" - #include "uml_uaccess.h" -+#include "task.h" -+#include "kern_util.h" - - int __do_copy_from_user(void *to, const void *from, int n, - void **fault_addr, void **fault_catcher) - { -+ struct tt_regs save = TASK_REGS(get_current())->tt; - unsigned long fault; - int faulted; - - fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, - __do_copy, &faulted); -+ TASK_REGS(get_current())->tt = save; -+ - if(!faulted) return(0); - else return(n - (fault - (unsigned long) from)); - } -@@ -29,11 +34,14 @@ - int __do_strncpy_from_user(char *dst, const char *src, unsigned long count, - void **fault_addr, void **fault_catcher) - { -+ struct tt_regs save = TASK_REGS(get_current())->tt; - unsigned long fault; - int faulted; - - fault = __do_user_copy(dst, src, count, fault_addr, fault_catcher, - __do_strncpy, &faulted); -+ TASK_REGS(get_current())->tt = save; -+ - if(!faulted) return(strlen(dst)); - else return(-1); - } -@@ -46,11 +54,14 @@ - int __do_clear_user(void *mem, unsigned long len, - void **fault_addr, void **fault_catcher) - { -+ struct tt_regs save = TASK_REGS(get_current())->tt; - unsigned long fault; - int faulted; - - fault = __do_user_copy(mem, NULL, len, fault_addr, fault_catcher, - __do_clear, &faulted); -+ TASK_REGS(get_current())->tt = save; -+ - if(!faulted) return(0); - else return(len - (fault - (unsigned long) mem)); - } -@@ -58,19 +69,20 @@ - int __do_strnlen_user(const char *str, unsigned long n, - void **fault_addr, void **fault_catcher) - { -+ struct tt_regs save = TASK_REGS(get_current())->tt; - int ret; - unsigned long *faddrp = (unsigned long *)fault_addr; - jmp_buf jbuf; - - *fault_catcher = &jbuf; -- if(setjmp(jbuf) == 0){ -+ if(sigsetjmp(jbuf, 1) == 0) - ret = strlen(str) + 1; -- } -- else { -- ret = *faddrp - (unsigned long) str; -- } -+ else ret = *faddrp - (unsigned long) str; -+ - *fault_addr = NULL; - *fault_catcher = NULL; -+ -+ TASK_REGS(get_current())->tt = save; - return ret; - } - -Index: uml-2.6.7/arch/um/kernel/syscall_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/syscall_kern.c 2004-07-16 19:37:08.748695072 +0300 -+++ uml-2.6.7/arch/um/kernel/syscall_kern.c 2004-07-16 19:47:23.737202608 +0300 -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -@@ -36,32 +36,34 @@ - - long sys_fork(void) - { -- struct task_struct *p; -+ long ret; - - current->thread.forking = 1; -- p = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL); -+ ret = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL); - current->thread.forking = 0; -- return(IS_ERR(p) ? PTR_ERR(p) : p->pid); -+ return(ret); - } - --long sys_clone(unsigned long clone_flags, unsigned long newsp) -+long sys_clone(unsigned long clone_flags, unsigned long newsp, -+ int *parent_tid, int *child_tid) - { -- struct task_struct *p; -+ long ret; - - current->thread.forking = 1; -- p = do_fork(clone_flags, newsp, NULL, 0, NULL, NULL); -+ ret = do_fork(clone_flags, newsp, NULL, 0, parent_tid, child_tid); - current->thread.forking = 0; -- return(IS_ERR(p) ? PTR_ERR(p) : p->pid); -+ return(ret); - } - - long sys_vfork(void) - { -- struct task_struct *p; -+ long ret; - - current->thread.forking = 1; -- p = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, NULL); -+ ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, -+ NULL); - current->thread.forking = 0; -- return(IS_ERR(p) ? PTR_ERR(p) : p->pid); -+ return(ret); - } - - /* common code for old and new mmaps */ -@@ -136,43 +138,12 @@ - - error = do_pipe(fd); - if (!error) { -- if (copy_to_user(fildes, fd, 2*sizeof(int))) -+ if (copy_to_user(fildes, fd, sizeof(fd))) - error = -EFAULT; - } - return error; - } - --int sys_sigaction(int sig, const struct old_sigaction *act, -- struct old_sigaction *oact) --{ -- struct k_sigaction new_ka, old_ka; -- int ret; -- -- if (act) { -- old_sigset_t mask; -- if (verify_area(VERIFY_READ, act, sizeof(*act)) || -- __get_user(new_ka.sa.sa_handler, &act->sa_handler) || -- __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) -- return -EFAULT; -- __get_user(new_ka.sa.sa_flags, &act->sa_flags); -- __get_user(mask, &act->sa_mask); -- siginitset(&new_ka.sa.sa_mask, mask); -- } -- -- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); -- -- if (!ret && oact) { -- if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || -- __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || -- __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) -- return -EFAULT; -- __put_user(old_ka.sa.sa_flags, &oact->sa_flags); -- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); -- } -- -- return ret; --} -- - /* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * -@@ -254,7 +225,7 @@ - return sys_shmctl (first, second, - (struct shmid_ds *) ptr); - default: -- return -EINVAL; -+ return -ENOSYS; - } - } - -@@ -303,11 +274,6 @@ - return error; - } - --int sys_sigaltstack(const stack_t *uss, stack_t *uoss) --{ -- return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs))); --} -- - long execute_syscall(void *r) - { - return(CHOOSE_MODE_PROC(execute_syscall_tt, execute_syscall_skas, r)); -Index: uml-2.6.7/arch/um/kernel/skas/util/mk_ptregs.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/util/mk_ptregs.c 2004-07-16 19:36:30.635489160 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/util/mk_ptregs.c 2004-07-16 19:47:23.736202760 +0300 -@@ -1,3 +1,4 @@ -+#include - #include - #include - -Index: uml-2.6.7/arch/um/kernel/skas/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/Makefile 2004-07-16 19:36:22.285758512 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/Makefile 2004-07-16 19:47:23.729203824 +0300 -@@ -5,20 +5,24 @@ - - obj-y = exec_kern.o exec_user.o mem.o mem_user.o mmu.o process.o \ - process_kern.o syscall_kern.o syscall_user.o time.o tlb.o trap_user.o \ -- sys-$(SUBARCH)/ -+ uaccess.o sys-$(SUBARCH)/ -+ -+host-progs := util/mk_ptregs -+clean-files := include/skas_ptregs.h - - USER_OBJS = $(filter %_user.o,$(obj-y)) process.o time.o - USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) - --include/skas_ptregs.h : util/mk_ptregs -- util/mk_ptregs > $@ -- --util/mk_ptregs : -- $(MAKE) -C util -+$(TOPDIR)/arch/um/include/skas_ptregs.h : $(src)/util/mk_ptregs -+ @echo -n ' Generating $@' -+ @$< > $@.tmp -+ @if [ -r $@ ] && cmp -s $@ $@.tmp; then \ -+ echo ' (unchanged)'; \ -+ rm -f $@.tmp; \ -+ else \ -+ echo ' (updated)'; \ -+ mv -f $@.tmp $@; \ -+ fi - - $(USER_OBJS) : %.o: %.c - $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< -- --clean : -- $(MAKE) -C util clean -- $(RM) -f include/skas_ptregs.h -Index: uml-2.6.7/include/asm-um/irq.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/irq.h 2004-07-16 19:37:43.396427816 +0300 -+++ uml-2.6.7/include/asm-um/irq.h 2004-07-16 19:47:23.790194552 +0300 -@@ -1,15 +1,6 @@ - #ifndef __UM_IRQ_H - #define __UM_IRQ_H - --/* The i386 irq.h has a struct task_struct in a prototype without including -- * sched.h. This forward declaration kills the resulting warning. -- */ --struct task_struct; -- --#include "asm/ptrace.h" -- --#undef NR_IRQS -- - #define TIMER_IRQ 0 - #define UMN_IRQ 1 - #define CONSOLE_IRQ 2 -@@ -28,13 +19,4 @@ - #define LAST_IRQ XTERM_IRQ - #define NR_IRQS (LAST_IRQ + 1) - --extern int um_request_irq(unsigned int irq, int fd, int type, -- void (*handler)(int, void *, struct pt_regs *), -- unsigned long irqflags, const char * devname, -- void *dev_id); -- --struct irqaction; --struct pt_regs; --int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *); -- - #endif -Index: uml-2.6.7/arch/um/drivers/line.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/line.c 2004-07-16 19:37:26.186044192 +0300 -+++ uml-2.6.7/arch/um/drivers/line.c 2004-07-16 19:47:23.685210512 +0300 -@@ -6,8 +6,8 @@ - #include "linux/sched.h" - #include "linux/slab.h" - #include "linux/list.h" -+#include "linux/interrupt.h" - #include "linux/devfs_fs_kernel.h" --#include "asm/irq.h" - #include "asm/uaccess.h" - #include "chan_kern.h" - #include "irq_user.h" -@@ -16,38 +16,55 @@ - #include "user_util.h" - #include "kern_util.h" - #include "os.h" -+#include "irq_kern.h" - - #define LINE_BUFSIZE 4096 - --void line_interrupt(int irq, void *data, struct pt_regs *unused) -+static irqreturn_t line_interrupt(int irq, void *data, struct pt_regs *unused) - { - struct line *dev = data; - - if(dev->count > 0) - chan_interrupt(&dev->chan_list, &dev->task, dev->tty, irq, - dev); -+ return IRQ_HANDLED; - } - --void line_timer_cb(void *arg) -+static void line_timer_cb(void *arg) - { - struct line *dev = arg; - - line_interrupt(dev->driver->read_irq, dev, NULL); - } - --static void buffer_data(struct line *line, const char *buf, int len) -+static int write_room(struct line *dev) - { -- int end; -+ int n; -+ -+ if(dev->buffer == NULL) return(LINE_BUFSIZE - 1); -+ -+ n = dev->head - dev->tail; -+ if(n <= 0) n = LINE_BUFSIZE + n; -+ return(n - 1); -+} -+ -+static int buffer_data(struct line *line, const char *buf, int len) -+{ -+ int end, room; - - if(line->buffer == NULL){ - line->buffer = kmalloc(LINE_BUFSIZE, GFP_ATOMIC); - if(line->buffer == NULL){ - printk("buffer_data - atomic allocation failed\n"); -- return; -+ return(0); - } - line->head = line->buffer; - line->tail = line->buffer; - } -+ -+ room = write_room(line); -+ len = (len > room) ? room : len; -+ - end = line->buffer + LINE_BUFSIZE - line->tail; - if(len < end){ - memcpy(line->tail, buf, len); -@@ -60,6 +77,8 @@ - memcpy(line->buffer, buf, len); - line->tail = line->buffer + len; - } -+ -+ return(len); - } - - static int flush_buffer(struct line *line) -@@ -95,7 +114,7 @@ - struct line *line; - char *new; - unsigned long flags; -- int n, err, i; -+ int n, err, i, ret = 0; - - if(tty->stopped) return 0; - -@@ -104,9 +123,13 @@ - if(new == NULL) - return(0); - n = copy_from_user(new, buf, len); -- if(n == len) -- return(-EFAULT); - buf = new; -+ if(n == len){ -+ len = -EFAULT; -+ goto out_free; -+ } -+ -+ len -= n; - } - - i = tty->index; -@@ -115,41 +138,50 @@ - down(&line->sem); - if(line->head != line->tail){ - local_irq_save(flags); -- buffer_data(line, buf, len); -+ ret += buffer_data(line, buf, len); - err = flush_buffer(line); - local_irq_restore(flags); - if(err <= 0) -- goto out; -+ goto out_up; - } - else { - n = write_chan(&line->chan_list, buf, len, - line->driver->write_irq); - if(n < 0){ -- len = n; -- goto out; -+ ret = n; -+ goto out_up; - } -- if(n < len) -- buffer_data(line, buf + n, len - n); -+ -+ len -= n; -+ ret += n; -+ if(len > 0) -+ ret += buffer_data(line, buf + n, len); - } -- out: -+ out_up: - up(&line->sem); -- return(len); -+ out_free: -+ if(from_user) -+ kfree(buf); -+ return(ret); - } - --void line_write_interrupt(int irq, void *data, struct pt_regs *unused) -+static irqreturn_t line_write_interrupt(int irq, void *data, -+ struct pt_regs *unused) - { - struct line *dev = data; - struct tty_struct *tty = dev->tty; - int err; - - err = flush_buffer(dev); -- if(err == 0) return; -+ if(err == 0) -+ return(IRQ_NONE); - else if(err < 0){ - dev->head = dev->buffer; - dev->tail = dev->buffer; - } - -- if(tty == NULL) return; -+ if(tty == NULL) -+ return(IRQ_NONE); - - if(test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && - (tty->ldisc.write_wakeup != NULL)) -@@ -161,21 +193,9 @@ - * writes. - */ - -- if (waitqueue_active(&tty->write_wait)) -+ if(waitqueue_active(&tty->write_wait)) - wake_up_interruptible(&tty->write_wait); -- --} -- --int line_write_room(struct tty_struct *tty) --{ -- struct line *dev = tty->driver_data; -- int n; -- -- if(dev->buffer == NULL) return(LINE_BUFSIZE - 1); -- -- n = dev->head - dev->tail; -- if(n <= 0) n = LINE_BUFSIZE + n; -- return(n - 1); -+ return(IRQ_HANDLED); - } - - int line_setup_irq(int fd, int input, int output, void *data) -@@ -305,7 +325,7 @@ - if(*end != '='){ - printk(KERN_ERR "line_setup failed to parse \"%s\"\n", - init); -- return(1); -+ return(0); - } - init = end; - } -@@ -313,12 +333,12 @@ - if((n >= 0) && (n >= num)){ - printk("line_setup - %d out of range ((0 ... %d) allowed)\n", - n, num); -- return(1); -+ return(0); - } - else if(n >= 0){ - if(lines[n].count > 0){ - printk("line_setup - device %d is open\n", n); -- return(1); -+ return(0); - } - if(lines[n].init_pri <= INIT_ONE){ - lines[n].init_pri = INIT_ONE; -@@ -332,7 +352,7 @@ - else if(!all_allowed){ - printk("line_setup - can't configure all devices from " - "mconsole\n"); -- return(1); -+ return(0); - } - else { - for(i = 0; i < num; i++){ -@@ -346,7 +366,7 @@ - } - } - } -- return(0); -+ return(1); - } - - int line_config(struct line *lines, int num, char *str) -@@ -357,7 +377,7 @@ - printk("line_config - uml_strdup failed\n"); - return(-ENOMEM); - } -- return(line_setup(lines, num, new, 0)); -+ return(!line_setup(lines, num, new, 0)); - } - - int line_get_config(char *name, struct line *lines, int num, char *str, -@@ -369,7 +389,7 @@ - - dev = simple_strtoul(name, &end, 0); - if((*end != '\0') || (end == name)){ -- *error_out = "line_setup failed to parse device number"; -+ *error_out = "line_get_config failed to parse device number"; - return(0); - } - -@@ -379,15 +399,15 @@ - } - - line = &lines[dev]; -+ - down(&line->sem); -- - if(!line->valid) - CONFIG_CHUNK(str, size, n, "none", 1); - else if(line->count == 0) - CONFIG_CHUNK(str, size, n, line->init_str, 1); - else n = chan_config_string(&line->chan_list, str, size, error_out); -- - up(&line->sem); -+ - return(n); - } - -@@ -396,7 +416,14 @@ - char config[sizeof("conxxxx=none\0")]; - - sprintf(config, "%s=none", str); -- return(line_setup(lines, num, config, 0)); -+ return(!line_setup(lines, num, config, 0)); -+} -+ -+int line_write_room(struct tty_struct *tty) -+{ -+ struct line *dev = tty->driver_data; -+ -+ return(write_room(dev)); - } - - struct tty_driver *line_register_devfs(struct lines *set, -@@ -412,7 +439,8 @@ - return NULL; - - driver->driver_name = line_driver->name; -- driver->name = line_driver->devfs_name; -+ driver->name = line_driver->device_name; -+ driver->devfs_name = line_driver->devfs_name; - driver->major = line_driver->major; - driver->minor_start = line_driver->minor_start; - driver->type = line_driver->type; -@@ -432,7 +460,7 @@ - - for(i = 0; i < nlines; i++){ - if(!lines[i].valid) -- tty_unregister_devfs(driver, i); -+ tty_unregister_device(driver, i); - } - - mconsole_register_dev(&line_driver->mc); -@@ -465,24 +493,25 @@ - struct line *line; - }; - --void winch_interrupt(int irq, void *data, struct pt_regs *unused) -+irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused) - { - struct winch *winch = data; - struct tty_struct *tty; - int err; - char c; - -- err = generic_read(winch->fd, &c, NULL); -- if(err < 0){ -- if(err != -EAGAIN){ -- printk("winch_interrupt : read failed, errno = %d\n", -- -err); -- printk("fd %d is losing SIGWINCH support\n", -- winch->tty_fd); -- free_irq(irq, data); -- return; -+ if(winch->fd != -1){ -+ err = generic_read(winch->fd, &c, NULL); -+ if(err < 0){ -+ if(err != -EAGAIN){ -+ printk("winch_interrupt : read failed, " -+ "errno = %d\n", -err); -+ printk("fd %d is losing SIGWINCH support\n", -+ winch->tty_fd); -+ return(IRQ_HANDLED); -+ } -+ goto out; - } -- goto out; - } - tty = winch->line->tty; - if(tty != NULL){ -@@ -492,7 +521,9 @@ - kill_pg(tty->pgrp, SIGWINCH, 1); - } - out: -- reactivate_fd(winch->fd, WINCH_IRQ); -+ if(winch->fd != -1) -+ reactivate_fd(winch->fd, WINCH_IRQ); -+ return(IRQ_HANDLED); - } - - DECLARE_MUTEX(winch_handler_sem); -@@ -529,7 +560,10 @@ - - list_for_each(ele, &winch_handlers){ - winch = list_entry(ele, struct winch, list); -- close(winch->fd); -+ if(winch->fd != -1){ -+ deactivate_fd(winch->fd, WINCH_IRQ); -+ os_close_file(winch->fd); -+ } - if(winch->pid != -1) - os_kill_process(winch->pid, 1); - } -Index: uml-2.6.7/arch/um/kernel/tt/process_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/process_kern.c 2004-07-16 19:37:20.233949048 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/process_kern.c 2004-07-16 19:47:23.746201240 +0300 -@@ -62,7 +62,7 @@ - reading = 0; - err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c)); - if(err != sizeof(c)) -- panic("write of switch_pipe failed, errno = %d", -err); -+ panic("write of switch_pipe failed, err = %d", -err); - - reading = 1; - if((from->state == TASK_ZOMBIE) || (from->state == TASK_DEAD)) -@@ -104,48 +104,72 @@ - - void release_thread_tt(struct task_struct *task) - { -- os_kill_process(task->thread.mode.tt.extern_pid, 0); -+ int pid = task->thread.mode.tt.extern_pid; -+ -+ if(os_getpid() != pid) -+ os_kill_process(pid, 0); - } - - void exit_thread_tt(void) - { -- close(current->thread.mode.tt.switch_pipe[0]); -- close(current->thread.mode.tt.switch_pipe[1]); -+ os_close_file(current->thread.mode.tt.switch_pipe[0]); -+ os_close_file(current->thread.mode.tt.switch_pipe[1]); - } - - void schedule_tail(task_t *prev); - - static void new_thread_handler(int sig) - { -+ unsigned long disable; - int (*fn)(void *); - void *arg; - - fn = current->thread.request.u.thread.proc; - arg = current->thread.request.u.thread.arg; -+ - UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); -+ disable = (1 << (SIGVTALRM - 1)) | (1 << (SIGALRM - 1)) | -+ (1 << (SIGIO - 1)) | (1 << (SIGPROF - 1)); -+ SC_SIGMASK(UPT_SC(¤t->thread.regs.regs)) &= ~disable; -+ - suspend_new_thread(current->thread.mode.tt.switch_pipe[0]); - -- block_signals(); -+ force_flush_all(); -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ - init_new_thread_signals(1); --#ifdef CONFIG_SMP -- schedule_tail(current->thread.prev_sched); --#endif - enable_timer(); - free_page(current->thread.temp_stack); - set_cmdline("(kernel thread)"); -- force_flush_all(); - -- current->thread.prev_sched = NULL; - change_sig(SIGUSR1, 1); - change_sig(SIGVTALRM, 1); - change_sig(SIGPROF, 1); -- unblock_signals(); -+ local_irq_enable(); - if(!run_kernel_thread(fn, arg, ¤t->thread.exec_buf)) - do_exit(0); - } - - static int new_thread_proc(void *stack) - { -+ /* local_irq_disable is needed to block out signals until this thread is -+ * properly scheduled. Otherwise, the tracing thread will get mighty -+ * upset about any signals that arrive before that. -+ * This has the complication that it sets the saved signal mask in -+ * the sigcontext to block signals. This gets restored when this -+ * thread (or a descendant, since they get a copy of this sigcontext) -+ * returns to userspace. -+ * So, this is compensated for elsewhere. -+ * XXX There is still a small window until local_irq_disable() actually -+ * finishes where signals are possible - shouldn't be a problem in -+ * practice since SIGIO hasn't been forwarded here yet, and the -+ * local_irq_disable should finish before a SIGVTALRM has time to be -+ * delivered. -+ */ -+ -+ local_irq_disable(); - init_new_thread_stack(stack, new_thread_handler); - os_usr1_process(os_getpid()); - return(0); -@@ -156,7 +180,7 @@ - * itself with a SIGUSR1. set_user_mode has to be run with SIGUSR1 off, - * so it is blocked before it's called. They are re-enabled on sigreturn - * despite the fact that they were blocked when the SIGUSR1 was issued because -- * copy_thread copies the parent's signcontext, including the signal mask -+ * copy_thread copies the parent's sigcontext, including the signal mask - * onto the signal frame. - */ - -@@ -165,35 +189,32 @@ - UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); - suspend_new_thread(current->thread.mode.tt.switch_pipe[0]); - --#ifdef CONFIG_SMP -- schedule_tail(NULL); --#endif -+ force_flush_all(); -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ - enable_timer(); - change_sig(SIGVTALRM, 1); - local_irq_enable(); -- force_flush_all(); - if(current->mm != current->parent->mm) - protect_memory(uml_reserved, high_physmem - uml_reserved, 1, - 1, 0, 1); -- task_protections((unsigned long) current->thread_info); -- -- current->thread.prev_sched = NULL; -+ task_protections((unsigned long) current_thread); - - free_page(current->thread.temp_stack); -+ local_irq_disable(); - change_sig(SIGUSR1, 0); - set_user_mode(current); - } - --static int sigusr1 = SIGUSR1; -- - int fork_tramp(void *stack) - { -- int sig = sigusr1; -- - local_irq_disable(); -+ arch_init_thread(); - init_new_thread_stack(stack, finish_fork_handler); - -- kill(os_getpid(), sig); -+ os_usr1_process(os_getpid()); - return(0); - } - -@@ -213,8 +234,8 @@ - } - - err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1); -- if(err){ -- printk("copy_thread : pipe failed, errno = %d\n", -err); -+ if(err < 0){ -+ printk("copy_thread : pipe failed, err = %d\n", -err); - return(err); - } - -@@ -377,8 +398,8 @@ - - pages = (1 << CONFIG_KERNEL_STACK_ORDER); - -- start = (unsigned long) current->thread_info + PAGE_SIZE; -- end = (unsigned long) current + PAGE_SIZE * pages; -+ start = (unsigned long) current_thread + PAGE_SIZE; -+ end = (unsigned long) current_thread + PAGE_SIZE * pages; - protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1); - protect_memory(end, high_physmem - end, 1, w, 1, 1); - -@@ -454,8 +475,9 @@ - - init_task.thread.mode.tt.extern_pid = pid; - err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1); -- if(err) panic("Can't create switch pipe for init_task, errno = %d", -- err); -+ if(err) -+ panic("Can't create switch pipe for init_task, errno = %d", -+ -err); - } - - int singlestepping_tt(void *t) -Index: uml-2.6.7/arch/um/include/2_5compat.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/2_5compat.h 2004-07-16 19:36:37.163496752 +0300 -+++ uml-2.6.7/arch/um/include/2_5compat.h 2004-07-16 19:47:23.700208232 +0300 -@@ -6,20 +6,6 @@ - #ifndef __2_5_COMPAT_H__ - #define __2_5_COMPAT_H__ - --#include "linux/version.h" -- --#define INIT_CONSOLE(dev_name, write_proc, device_proc, setup_proc, f) { \ -- name : dev_name, \ -- write : write_proc, \ -- read : NULL, \ -- device : device_proc, \ -- setup : setup_proc, \ -- flags : f, \ -- index : -1, \ -- cflag : 0, \ -- next : NULL \ --} -- - #define INIT_HARDSECT(arr, maj, sizes) - - #define SET_PRI(task) do ; while(0) -Index: uml-2.6.7/fs/hostfs/Makefile -=================================================================== ---- uml-2.6.7.orig/fs/hostfs/Makefile 2004-07-16 19:47:23.631218720 +0300 -+++ uml-2.6.7/fs/hostfs/Makefile 2004-07-16 19:47:23.784195464 +0300 -@@ -0,0 +1,26 @@ -+# -+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+# struct stat64 changed the inode field name between 2.2 and 2.4 from st_ino -+# to __st_ino. It stayed in the same place, so as long as the correct name -+# is used, hostfs compiled on 2.2 should work on 2.4 and vice versa. -+ -+STAT64_INO_FIELD := $(shell grep -q __st_ino /usr/include/bits/stat.h && \ -+ echo __)st_ino -+ -+hostfs-objs := hostfs_kern.o hostfs_user.o -+ -+obj-y = -+obj-$(CONFIG_HOSTFS) += hostfs.o -+ -+SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs)) -+ -+USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(SINGLE_OBJS)) -+USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) -+ -+USER_CFLAGS += -DSTAT64_INO_FIELD=$(STAT64_INO_FIELD) -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< -Index: uml-2.6.7/arch/um/kernel/skas/mmu.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/mmu.c 2004-07-16 19:37:51.994120768 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/mmu.c 2004-07-16 19:47:23.730203672 +0300 -@@ -22,9 +22,11 @@ - else from = -1; - - mm->context.skas.mm_fd = new_mm(from); -- if(mm->context.skas.mm_fd < 0) -- panic("init_new_context_skas - new_mm failed, errno = %d\n", -- mm->context.skas.mm_fd); -+ if(mm->context.skas.mm_fd < 0){ -+ printk("init_new_context_skas - new_mm failed, errno = %d\n", -+ mm->context.skas.mm_fd); -+ return(mm->context.skas.mm_fd); -+ } - - return(0); - } -Index: uml-2.6.7/arch/um/kernel/skas/sys-i386/sigcontext.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/sys-i386/sigcontext.c 2004-07-16 19:35:56.079742432 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/sys-i386/sigcontext.c 2004-07-16 19:47:23.733203216 +0300 -@@ -12,10 +12,9 @@ - #include "kern_util.h" - #include "user.h" - #include "sigcontext.h" -+#include "mode.h" - --extern int userspace_pid; -- --int copy_sc_from_user_skas(union uml_pt_regs *regs, void *from_ptr) -+int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, void *from_ptr) - { - struct sigcontext sc, *from = from_ptr; - unsigned long fpregs[FP_FRAME_SIZE]; -@@ -41,13 +40,12 @@ - regs->skas.regs[EIP] = sc.eip; - regs->skas.regs[CS] = sc.cs; - regs->skas.regs[EFL] = sc.eflags; -- regs->skas.regs[UESP] = sc.esp_at_signal; - regs->skas.regs[SS] = sc.ss; - regs->skas.fault_addr = sc.cr2; - regs->skas.fault_type = FAULT_WRITE(sc.err); - regs->skas.trap_type = sc.trapno; - -- err = ptrace(PTRACE_SETFPREGS, userspace_pid, 0, fpregs); -+ err = ptrace(PTRACE_SETFPREGS, pid, 0, fpregs); - if(err < 0){ - printk("copy_sc_to_user - PTRACE_SETFPREGS failed, " - "errno = %d\n", errno); -@@ -57,8 +55,9 @@ - return(0); - } - --int copy_sc_to_user_skas(void *to_ptr, void *fp, union uml_pt_regs *regs, -- unsigned long fault_addr, int fault_type) -+int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp, -+ union uml_pt_regs *regs, unsigned long fault_addr, -+ int fault_type) - { - struct sigcontext sc, *to = to_ptr; - struct _fpstate *to_fp; -@@ -86,7 +85,7 @@ - sc.err = TO_SC_ERR(fault_type); - sc.trapno = regs->skas.trap_type; - -- err = ptrace(PTRACE_GETFPREGS, userspace_pid, 0, fpregs); -+ err = ptrace(PTRACE_GETFPREGS, pid, 0, fpregs); - if(err < 0){ - printk("copy_sc_to_user - PTRACE_GETFPREGS failed, " - "errno = %d\n", errno); -Index: uml-2.6.7/arch/um/kernel/skas/process.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/process.c 2004-07-16 19:37:38.120229920 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/process.c 2004-07-16 19:47:24.793042096 +0300 -@@ -4,6 +4,7 @@ - */ - - #include -+#include - #include - #include - #include -@@ -24,6 +25,19 @@ - #include "os.h" - #include "proc_mm.h" - #include "skas_ptrace.h" -+#include "chan_user.h" -+#include "signal_user.h" -+ -+int is_skas_winch(int pid, int fd, void *data) -+{ -+ if(pid != getpid()) -+ return(0); -+ -+ register_winch_irq(-1, fd, -1, data); -+ return(1); -+} -+ -+/* These are set once at boot time and not changed thereafter */ - - unsigned long exec_regs[FRAME_SIZE]; - unsigned long exec_fp_regs[HOST_FP_SIZE]; -@@ -43,37 +57,39 @@ - segv(fault.addr, 0, FAULT_WRITE(fault.is_write), 1, NULL); - } - --static void handle_trap(int pid, union uml_pt_regs *regs) -+/*To use the same value of using_sysemu as the caller, ask it that value (in local_using_sysemu)*/ -+static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu) - { - int err, syscall_nr, status; - - syscall_nr = PT_SYSCALL_NR(regs->skas.regs); -+ UPT_SYSCALL_NR(regs) = syscall_nr; - if(syscall_nr < 1){ - relay_signal(SIGTRAP, regs); - return; - } -- UPT_SYSCALL_NR(regs) = syscall_nr; - -- err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, __NR_getpid); -- if(err < 0) -- panic("handle_trap - nullifying syscall failed errno = %d\n", -- errno); -+ if (!local_using_sysemu) -+ { -+ err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, __NR_getpid); -+ if(err < 0) -+ panic("handle_trap - nullifying syscall failed errno = %d\n", -+ errno); - -- err = ptrace(PTRACE_SYSCALL, pid, 0, 0); -- if(err < 0) -- panic("handle_trap - continuing to end of syscall failed, " -- "errno = %d\n", errno); -- -- err = waitpid(pid, &status, WUNTRACED); -- if((err < 0) || !WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) -- panic("handle_trap - failed to wait at end of syscall, " -- "errno = %d, status = %d\n", errno, status); -+ err = ptrace(PTRACE_SYSCALL, pid, 0, 0); -+ if(err < 0) -+ panic("handle_trap - continuing to end of syscall failed, " -+ "errno = %d\n", errno); -+ -+ CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED)); -+ if((err < 0) || !WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) -+ panic("handle_trap - failed to wait at end of syscall, " -+ "errno = %d, status = %d\n", errno, status); -+ } - - handle_syscall(regs); - } - --int userspace_pid; -- - static int userspace_tramp(void *arg) - { - init_new_thread_signals(0); -@@ -83,7 +99,11 @@ - return(0); - } - --void start_userspace(void) -+/* Each element set once, and only accessed by a single processor anyway */ -+#define NR_CPUS 1 -+int userspace_pid[NR_CPUS]; -+ -+void start_userspace(int cpu) - { - void *stack; - unsigned long sp; -@@ -101,7 +121,7 @@ - panic("start_userspace : clone failed, errno = %d", errno); - - do { -- n = waitpid(pid, &status, WUNTRACED); -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("start_userspace : wait failed, errno = %d", - errno); -@@ -114,21 +134,27 @@ - if(munmap(stack, PAGE_SIZE) < 0) - panic("start_userspace : munmap failed, errno = %d\n", errno); - -- userspace_pid = pid; -+ userspace_pid[cpu] = pid; - } - - void userspace(union uml_pt_regs *regs) - { -- int err, status, op; -+ int err, status, op, pid = userspace_pid[0]; -+ int local_using_sysemu; /*To prevent races if using_sysemu changes under us.*/ - - restore_registers(regs); - -- err = ptrace(PTRACE_SYSCALL, userspace_pid, 0, 0); -+ local_using_sysemu = get_using_sysemu(); -+ -+ if (local_using_sysemu) -+ err = ptrace(PTRACE_SYSEMU, pid, 0, 0); -+ else -+ err = ptrace(PTRACE_SYSCALL, pid, 0, 0); - if(err) - panic("userspace - PTRACE_SYSCALL failed, errno = %d\n", - errno); - while(1){ -- err = waitpid(userspace_pid, &status, WUNTRACED); -+ CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED)); - if(err < 0) - panic("userspace - waitpid failed, errno = %d\n", - errno); -@@ -139,16 +165,17 @@ - if(WIFSTOPPED(status)){ - switch(WSTOPSIG(status)){ - case SIGSEGV: -- handle_segv(userspace_pid); -+ handle_segv(pid); - break; - case SIGTRAP: -- handle_trap(userspace_pid, regs); -+ handle_trap(pid, regs, local_using_sysemu); - break; - case SIGIO: - case SIGVTALRM: - case SIGILL: - case SIGBUS: - case SIGFPE: -+ case SIGWINCH: - user_signal(WSTOPSIG(status), regs); - break; - default: -@@ -160,9 +187,17 @@ - - restore_registers(regs); - -- op = singlestepping_skas() ? PTRACE_SINGLESTEP : -- PTRACE_SYSCALL; -- err = ptrace(op, userspace_pid, 0, 0); -+ /*Now we ended the syscall, so re-read local_using_sysemu.*/ -+ local_using_sysemu = get_using_sysemu(); -+ -+ if (local_using_sysemu) -+ op = singlestepping_skas() ? PTRACE_SINGLESTEP : -+ PTRACE_SYSEMU; -+ else -+ op = singlestepping_skas() ? PTRACE_SINGLESTEP : -+ PTRACE_SYSCALL; -+ -+ err = ptrace(op, pid, 0, 0); - if(err) - panic("userspace - PTRACE_SYSCALL failed, " - "errno = %d\n", errno); -@@ -172,13 +207,25 @@ - void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, - void (*handler)(int)) - { -+ unsigned long flags; - jmp_buf switch_buf, fork_buf; - - *switch_buf_ptr = &switch_buf; - *fork_buf_ptr = &fork_buf; - -- if(setjmp(fork_buf) == 0) -+ /* Somewhat subtle - siglongjmp restores the signal mask before doing -+ * the longjmp. This means that when jumping from one stack to another -+ * when the target stack has interrupts enabled, an interrupt may occur -+ * on the source stack. This is bad when starting up a process because -+ * it's not supposed to get timer ticks until it has been scheduled. -+ * So, we disable interrupts around the sigsetjmp to ensure that -+ * they can't happen until we get back here where they are safe. -+ */ -+ flags = get_signals(); -+ block_signals(); -+ if(sigsetjmp(fork_buf, 1) == 0) - new_thread_proc(stack, handler); -+ set_signals(flags); - - remove_sigstack(); - } -@@ -189,16 +236,16 @@ - - *switch_buf = &buf; - fork_buf = fb; -- if(setjmp(buf) == 0) -- longjmp(*fork_buf, 1); -+ if(sigsetjmp(buf, 1) == 0) -+ siglongjmp(*fork_buf, 1); - } - --static int move_registers(int int_op, int fp_op, union uml_pt_regs *regs, -- unsigned long *fp_regs) -+static int move_registers(int pid, int int_op, int fp_op, -+ union uml_pt_regs *regs, unsigned long *fp_regs) - { -- if(ptrace(int_op, userspace_pid, 0, regs->skas.regs) < 0) -+ if(ptrace(int_op, pid, 0, regs->skas.regs) < 0) - return(-errno); -- if(ptrace(fp_op, userspace_pid, 0, fp_regs) < 0) -+ if(ptrace(fp_op, pid, 0, fp_regs) < 0) - return(-errno); - return(0); - } -@@ -217,10 +264,11 @@ - fp_regs = regs->skas.fp; - } - -- err = move_registers(PTRACE_GETREGS, fp_op, regs, fp_regs); -+ err = move_registers(userspace_pid[0], PTRACE_GETREGS, fp_op, regs, -+ fp_regs); - if(err) - panic("save_registers - saving registers failed, errno = %d\n", -- err); -+ -err); - } - - void restore_registers(union uml_pt_regs *regs) -@@ -237,10 +285,11 @@ - fp_regs = regs->skas.fp; - } - -- err = move_registers(PTRACE_SETREGS, fp_op, regs, fp_regs); -+ err = move_registers(userspace_pid[0], PTRACE_SETREGS, fp_op, regs, -+ fp_regs); - if(err) - panic("restore_registers - saving registers failed, " -- "errno = %d\n", err); -+ "errno = %d\n", -err); - } - - void switch_threads(void *me, void *next) -@@ -248,8 +297,8 @@ - jmp_buf my_buf, **me_ptr = me, *next_buf = next; - - *me_ptr = &my_buf; -- if(setjmp(my_buf) == 0) -- longjmp(*next_buf, 1); -+ if(sigsetjmp(my_buf, 1) == 0) -+ siglongjmp(*next_buf, 1); - } - - static jmp_buf initial_jmpbuf; -@@ -265,14 +314,14 @@ - int n; - - *fork_buf_ptr = &initial_jmpbuf; -- n = setjmp(initial_jmpbuf); -+ n = sigsetjmp(initial_jmpbuf, 1); - if(n == 0) - new_thread_proc((void *) stack, new_thread_handler); - else if(n == 1) - remove_sigstack(); - else if(n == 2){ - (*cb_proc)(cb_arg); -- longjmp(*cb_back, 1); -+ siglongjmp(*cb_back, 1); - } - else if(n == 3){ - kmalloc_ok = 0; -@@ -282,7 +331,7 @@ - kmalloc_ok = 0; - return(1); - } -- longjmp(**switch_buf, 1); -+ siglongjmp(**switch_buf, 1); - } - - void remove_sigstack(void) -@@ -304,8 +353,8 @@ - cb_back = &here; - - block_signals(); -- if(setjmp(here) == 0) -- longjmp(initial_jmpbuf, 2); -+ if(sigsetjmp(here, 1) == 0) -+ siglongjmp(initial_jmpbuf, 2); - unblock_signals(); - - cb_proc = NULL; -@@ -316,22 +365,23 @@ - void halt_skas(void) - { - block_signals(); -- longjmp(initial_jmpbuf, 3); -+ siglongjmp(initial_jmpbuf, 3); - } - - void reboot_skas(void) - { - block_signals(); -- longjmp(initial_jmpbuf, 4); -+ siglongjmp(initial_jmpbuf, 4); - } - - int new_mm(int from) - { - struct proc_mm_op copy; -- int n, fd = os_open_file("/proc/mm", of_write(OPENFLAGS()), 0); -+ int n, fd = os_open_file("/proc/mm", -+ of_cloexec(of_write(OPENFLAGS())), 0); - - if(fd < 0) -- return(-errno); -+ return(fd); - - if(from != -1){ - copy = ((struct proc_mm_op) { .op = MM_COPY_SEGMENTS, -@@ -340,8 +390,9 @@ - n = os_write_file(fd, ©, sizeof(copy)); - if(n != sizeof(copy)) - printk("new_mm : /proc/mm copy_segments failed, " -- "errno = %d\n", errno); -+ "err = %d\n", -n); - } -+ - return(fd); - } - -@@ -349,7 +400,8 @@ - { - int err; - -- err = ptrace(PTRACE_SWITCH_MM, userspace_pid, 0, mm_fd); -+#warning need cpu pid in switch_mm_skas -+ err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd); - if(err) - panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n", - errno); -@@ -357,7 +409,8 @@ - - void kill_off_processes_skas(void) - { -- os_kill_process(userspace_pid, 1); -+#warning need to loop over userspace_pids in kill_off_processes_skas -+ os_kill_process(userspace_pid[0], 1); - } - - void init_registers(int pid) -Index: uml-2.6.7/arch/um/os-Linux/tty.c -=================================================================== ---- uml-2.6.7.orig/arch/um/os-Linux/tty.c 2004-07-16 19:36:54.138916096 +0300 -+++ uml-2.6.7/arch/um/os-Linux/tty.c 2004-07-16 19:47:23.769197744 +0300 -@@ -28,10 +28,10 @@ - struct grantpt_info info; - int fd; - -- if((fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0)) < 0){ -- printk("get_pty : Couldn't open /dev/ptmx - errno = %d\n", -- errno); -- return(-1); -+ fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("get_pty : Couldn't open /dev/ptmx - err = %d\n", -fd); -+ return(fd); - } - - info.fd = fd; -@@ -39,7 +39,7 @@ - - if(info.res < 0){ - printk("get_pty : Couldn't grant pty - errno = %d\n", -- info.err); -+ -info.err); - return(-1); - } - if(unlockpt(fd) < 0){ -Index: uml-2.6.7/arch/um/sys-ia64/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/sys-ia64/Makefile 2004-07-16 19:35:55.505829680 +0300 -+++ uml-2.6.7/arch/um/sys-ia64/Makefile 2004-07-16 19:47:23.778196376 +0300 -@@ -7,18 +7,5 @@ - $(OBJ): $(OBJS) - rm -f $@ - $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ --clean: -- rm -f $(OBJS) - --fastdep: -- --archmrproper: -- --archclean: -- rm -f link.ld -- @$(MAKEBOOT) clean -- --archdep: -- @$(MAKEBOOT) dep -- --modules: -+clean-files := $(OBJS) link.ld -Index: uml-2.6.7/arch/um/sys-i386/util/mk_sc.c -=================================================================== ---- uml-2.6.7.orig/arch/um/sys-i386/util/mk_sc.c 2004-07-16 19:37:26.086059392 +0300 -+++ uml-2.6.7/arch/um/sys-i386/util/mk_sc.c 2004-07-16 19:47:23.778196376 +0300 -@@ -38,6 +38,7 @@ - SC_OFFSET("SC_ERR", err); - SC_OFFSET("SC_CR2", cr2); - SC_OFFSET("SC_FPSTATE", fpstate); -+ SC_OFFSET("SC_SIGMASK", oldmask); - SC_FP_OFFSET("SC_FP_CW", cw); - SC_FP_OFFSET("SC_FP_SW", sw); - SC_FP_OFFSET("SC_FP_TAG", tag); -Index: uml-2.6.7/arch/um/sys-i386/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/sys-i386/Makefile 2004-07-16 19:36:05.106370176 +0300 -+++ uml-2.6.7/arch/um/sys-i386/Makefile 2004-07-16 19:47:23.772197288 +0300 -@@ -1,7 +1,8 @@ --obj-y = bugs.o checksum.o extable.o fault.o ksyms.o ldt.o module.o \ -- ptrace.o ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o -+obj-y = bugs.o checksum.o fault.o ksyms.o ldt.o ptrace.o ptrace_user.o \ -+ semaphore.o sigcontext.o syscalls.o sysrq.o time.o - - obj-$(CONFIG_HIGHMEM) += highmem.o -+obj-$(CONFIG_MODULES) += module.o - - USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o - USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) -@@ -9,6 +10,8 @@ - SYMLINKS = semaphore.c highmem.c module.c - SYMLINKS := $(foreach f,$(SYMLINKS),$(src)/$f) - -+clean-files := $(SYMLINKS) -+ - semaphore.c-dir = kernel - highmem.c-dir = mm - module.c-dir = kernel -@@ -24,19 +27,4 @@ - $(SYMLINKS): - $(call make_link,$@) - --clean: -- $(MAKE) -C util clean -- --fastdep: -- --dep: -- --archmrproper: -- rm -f $(SYMLINKS) -- --archclean: -- --archdep: -- --modules: -- -+subdir- := util -Index: uml-2.6.7/arch/um/kernel/signal_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/signal_kern.c 2004-07-16 19:36:51.321344432 +0300 -+++ uml-2.6.7/arch/um/kernel/signal_kern.c 2004-07-16 19:47:23.726204280 +0300 -@@ -36,7 +36,7 @@ - if(sig == SIGSEGV){ - struct k_sigaction *ka; - -- ka = ¤t->sig->action[SIGSEGV - 1]; -+ ka = ¤t->sighand->action[SIGSEGV - 1]; - ka->sa.sa_handler = SIG_DFL; - } - force_sig(SIGSEGV, current); -@@ -60,10 +60,10 @@ - int err, ret; - - ret = 0; -+ /* Always make any pending restarted system calls return -EINTR */ -+ current_thread_info()->restart_block.fn = do_no_restart_syscall; - switch(error){ - case -ERESTART_RESTARTBLOCK: -- current_thread_info()->restart_block.fn = -- do_no_restart_syscall; - case -ERESTARTNOHAND: - ret = -EINTR; - break; -@@ -142,7 +142,7 @@ - return(0); - - /* Whee! Actually deliver the signal. */ -- ka = ¤t->sig->action[sig -1 ]; -+ ka = ¤t->sighand->action[sig -1 ]; - err = handle_signal(regs, sig, ka, &info, oldset, error); - if(!err) return(1); - -@@ -201,7 +201,7 @@ - } - } - --int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) -+int sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) - { - sigset_t saveset, newset; - -@@ -227,20 +227,59 @@ - } - } - -+int sys_sigaction(int sig, const struct old_sigaction __user *act, -+ struct old_sigaction __user *oact) -+{ -+ struct k_sigaction new_ka, old_ka; -+ int ret; -+ -+ if (act) { -+ old_sigset_t mask; -+ if (verify_area(VERIFY_READ, act, sizeof(*act)) || -+ __get_user(new_ka.sa.sa_handler, &act->sa_handler) || -+ __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) -+ return -EFAULT; -+ __get_user(new_ka.sa.sa_flags, &act->sa_flags); -+ __get_user(mask, &act->sa_mask); -+ siginitset(&new_ka.sa.sa_mask, mask); -+ } -+ -+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); -+ -+ if (!ret && oact) { -+ if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || -+ __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || -+ __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) -+ return -EFAULT; -+ __put_user(old_ka.sa.sa_flags, &oact->sa_flags); -+ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); -+ } -+ -+ return ret; -+} -+ -+int sys_sigaltstack(const stack_t *uss, stack_t *uoss) -+{ -+ return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs))); -+} -+ -+extern int userspace_pid[]; -+ - static int copy_sc_from_user(struct pt_regs *to, void *from, - struct arch_frame_data *arch) - { - int ret; - - ret = CHOOSE_MODE(copy_sc_from_user_tt(UPT_SC(&to->regs), from, arch), -- copy_sc_from_user_skas(&to->regs, from)); -+ copy_sc_from_user_skas(userspace_pid[0], -+ &to->regs, from)); - return(ret); - } - - int sys_sigreturn(struct pt_regs regs) - { -- void *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs)); -- void *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs)); -+ void __user *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs)); -+ void __user *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs)); - int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); - - spin_lock_irq(¤t->sighand->siglock); -@@ -257,8 +296,8 @@ - - int sys_rt_sigreturn(struct pt_regs regs) - { -- struct ucontext *uc = sp_to_uc(PT_REGS_SP(¤t->thread.regs)); -- void *fp; -+ unsigned long sp = PT_REGS_SP(¤t->thread.regs); -+ struct ucontext __user *uc = sp_to_uc(sp); - int sig_size = _NSIG_WORDS * sizeof(unsigned long); - - spin_lock_irq(¤t->sighand->siglock); -@@ -266,7 +305,6 @@ - sigdelsetmask(¤t->blocked, ~_BLOCKABLE); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); -- fp = (void *) (((unsigned long) uc) + sizeof(struct ucontext)); - copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext, - &signal_frame_si.common.arch); - return(PT_REGS_SYSCALL_RET(¤t->thread.regs)); -Index: uml-2.6.7/arch/um/kernel/sysrq.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/sysrq.c 2004-07-16 19:36:24.697391888 +0300 -+++ uml-2.6.7/arch/um/kernel/sysrq.c 2004-07-16 19:47:23.739202304 +0300 -@@ -44,6 +44,11 @@ - } - EXPORT_SYMBOL(dump_stack); - -+void show_stack(struct task_struct *task, unsigned long *sp) -+{ -+ show_trace(sp); -+} -+ - /* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically -Index: uml-2.6.7/arch/um/include/sysdep-i386/syscalls.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/sysdep-i386/syscalls.h 2004-07-16 19:36:04.873405592 +0300 -+++ uml-2.6.7/arch/um/include/sysdep-i386/syscalls.h 2004-07-16 19:47:23.708207016 +0300 -@@ -11,39 +11,34 @@ - #define EXECUTE_SYSCALL(syscall, regs) \ - ((long (*)(struct syscall_args)) (*sys_call_table[syscall]))(SYSCALL_ARGS(®s->regs)) - --extern syscall_handler_t sys_modify_ldt; --extern syscall_handler_t old_mmap_i386; --extern syscall_handler_t old_select; --extern syscall_handler_t sys_ni_syscall; -- - #define ARCH_SYSCALLS \ -- [ __NR_mmap ] = old_mmap_i386, \ -- [ __NR_select ] = old_select, \ -- [ __NR_vm86old ] = sys_ni_syscall, \ -- [ __NR_modify_ldt ] = sys_modify_ldt, \ -- [ __NR_lchown32 ] = sys_lchown, \ -- [ __NR_getuid32 ] = sys_getuid, \ -- [ __NR_getgid32 ] = sys_getgid, \ -- [ __NR_geteuid32 ] = sys_geteuid, \ -- [ __NR_getegid32 ] = sys_getegid, \ -- [ __NR_setreuid32 ] = sys_setreuid, \ -- [ __NR_setregid32 ] = sys_setregid, \ -- [ __NR_getgroups32 ] = sys_getgroups, \ -- [ __NR_setgroups32 ] = sys_setgroups, \ -- [ __NR_fchown32 ] = sys_fchown, \ -- [ __NR_setresuid32 ] = sys_setresuid, \ -- [ __NR_getresuid32 ] = sys_getresuid, \ -- [ __NR_setresgid32 ] = sys_setresgid, \ -- [ __NR_getresgid32 ] = sys_getresgid, \ -- [ __NR_chown32 ] = sys_chown, \ -- [ __NR_setuid32 ] = sys_setuid, \ -- [ __NR_setgid32 ] = sys_setgid, \ -- [ __NR_setfsuid32 ] = sys_setfsuid, \ -- [ __NR_setfsgid32 ] = sys_setfsgid, \ -- [ __NR_pivot_root ] = sys_pivot_root, \ -- [ __NR_mincore ] = sys_mincore, \ -- [ __NR_madvise ] = sys_madvise, \ -- [ 222 ] = sys_ni_syscall, -+ [ __NR_mmap ] = (syscall_handler_t *) old_mmap_i386, \ -+ [ __NR_select ] = (syscall_handler_t *) old_select, \ -+ [ __NR_vm86old ] = (syscall_handler_t *) sys_ni_syscall, \ -+ [ __NR_modify_ldt ] = (syscall_handler_t *) sys_modify_ldt, \ -+ [ __NR_lchown32 ] = (syscall_handler_t *) sys_lchown, \ -+ [ __NR_getuid32 ] = (syscall_handler_t *) sys_getuid, \ -+ [ __NR_getgid32 ] = (syscall_handler_t *) sys_getgid, \ -+ [ __NR_geteuid32 ] = (syscall_handler_t *) sys_geteuid, \ -+ [ __NR_getegid32 ] = (syscall_handler_t *) sys_getegid, \ -+ [ __NR_setreuid32 ] = (syscall_handler_t *) sys_setreuid, \ -+ [ __NR_setregid32 ] = (syscall_handler_t *) sys_setregid, \ -+ [ __NR_getgroups32 ] = (syscall_handler_t *) sys_getgroups, \ -+ [ __NR_setgroups32 ] = (syscall_handler_t *) sys_setgroups, \ -+ [ __NR_fchown32 ] = (syscall_handler_t *) sys_fchown, \ -+ [ __NR_setresuid32 ] = (syscall_handler_t *) sys_setresuid, \ -+ [ __NR_getresuid32 ] = (syscall_handler_t *) sys_getresuid, \ -+ [ __NR_setresgid32 ] = (syscall_handler_t *) sys_setresgid, \ -+ [ __NR_getresgid32 ] = (syscall_handler_t *) sys_getresgid, \ -+ [ __NR_chown32 ] = (syscall_handler_t *) sys_chown, \ -+ [ __NR_setuid32 ] = (syscall_handler_t *) sys_setuid, \ -+ [ __NR_setgid32 ] = (syscall_handler_t *) sys_setgid, \ -+ [ __NR_setfsuid32 ] = (syscall_handler_t *) sys_setfsuid, \ -+ [ __NR_setfsgid32 ] = (syscall_handler_t *) sys_setfsgid, \ -+ [ __NR_pivot_root ] = (syscall_handler_t *) sys_pivot_root, \ -+ [ __NR_mincore ] = (syscall_handler_t *) sys_mincore, \ -+ [ __NR_madvise ] = (syscall_handler_t *) sys_madvise, \ -+ [ 222 ] = (syscall_handler_t *) sys_ni_syscall, - - /* 222 doesn't yet have a name in include/asm-i386/unistd.h */ - -Index: uml-2.6.7/include/linux/time.h -=================================================================== ---- uml-2.6.7.orig/include/linux/time.h 2004-07-16 19:36:57.136460400 +0300 -+++ uml-2.6.7/include/linux/time.h 2004-07-16 19:47:23.805192272 +0300 -@@ -41,7 +41,7 @@ - * Have the 32 bit jiffies value wrap 5 minutes after boot - * so jiffies wrap bugs show up earlier. - */ --#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) -+#define INITIAL_JIFFIES ((unsigned long)(0)) - - /* - * Change timeval to jiffies, trying to avoid the -Index: uml-2.6.7/include/asm-um/page.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/page.h 2004-07-16 19:36:51.390333944 +0300 -+++ uml-2.6.7/include/asm-um/page.h 2004-07-16 19:47:23.792194248 +0300 -@@ -1,10 +1,14 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ - #ifndef __UM_PAGE_H - #define __UM_PAGE_H - - struct page; - - #include "asm/arch/page.h" --#include "asm/bug.h" - - #undef __pa - #undef __va -@@ -24,25 +28,36 @@ - - #define __va_space (8*1024*1024) - --extern unsigned long region_pa(void *virt); --extern void *region_va(unsigned long phys); -- --#define __pa(virt) region_pa((void *) (virt)) --#define __va(phys) region_va((unsigned long) (phys)) -- --extern unsigned long page_to_pfn(struct page *page); --extern struct page *pfn_to_page(unsigned long pfn); -+extern unsigned long to_phys(void *virt); -+extern void *to_virt(unsigned long phys); - --extern struct page *phys_to_page(unsigned long phys); -+#define __pa(virt) to_phys((void *) virt) -+#define __va(phys) to_virt((unsigned long) phys) - --#define virt_to_page(v) (phys_to_page(__pa(v))) -+#define page_to_pfn(page) ((page) - mem_map) -+#define pfn_to_page(pfn) (mem_map + (pfn)) - --extern struct page *page_mem_map(struct page *page); -+#define phys_to_pfn(p) ((p) >> PAGE_SHIFT) -+#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) - --#define pfn_valid(pfn) (page_mem_map(pfn_to_page(pfn)) != NULL) --#define virt_addr_valid(v) pfn_valid(__pa(v) >> PAGE_SHIFT) -+#define pfn_valid(pfn) ((pfn) < max_mapnr) -+#define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v))) - - extern struct page *arch_validate(struct page *page, int mask, int order); - #define HAVE_ARCH_VALIDATE - -+extern void arch_free_page(struct page *page, int order); -+#define HAVE_ARCH_FREE_PAGE -+ - #endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/include/signal_user.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/signal_user.h 2004-07-16 19:35:55.511828768 +0300 -+++ uml-2.6.7/arch/um/include/signal_user.h 2004-07-16 19:47:23.705207472 +0300 -@@ -11,6 +11,8 @@ - extern int change_sig(int signal, int on); - extern void set_sigstack(void *stack, int size); - extern void set_handler(int sig, void (*handler)(int), int flags, ...); -+extern int set_signals(int enable); -+extern int get_signals(void); - - #endif - -Index: uml-2.6.7/include/asm-um/local.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/local.h 2004-07-16 19:47:23.633218416 +0300 -+++ uml-2.6.7/include/asm-um/local.h 2004-07-16 19:47:23.791194400 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_LOCAL_H -+#define __UM_LOCAL_H -+ -+#include "asm/arch/local.h" -+ -+#endif -Index: uml-2.6.7/include/asm-um/thread_info.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/thread_info.h 2004-07-16 19:36:22.451733280 +0300 -+++ uml-2.6.7/include/asm-um/thread_info.h 2004-07-16 19:47:23.803192576 +0300 -@@ -9,6 +9,7 @@ - #ifndef __ASSEMBLY__ - - #include -+#include - - struct thread_info { - struct task_struct *task; /* main task structure */ -@@ -43,15 +44,18 @@ - static inline struct thread_info *current_thread_info(void) - { - struct thread_info *ti; -- __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~16383UL)); -+ unsigned long mask = PAGE_SIZE * -+ (1 << CONFIG_KERNEL_STACK_ORDER) - 1; -+ __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~mask)); - return ti; - } - - /* thread information allocation */ --#define THREAD_SIZE (4*PAGE_SIZE) --#define alloc_thread_info(tsk) ((struct thread_info *) \ -- __get_free_pages(GFP_KERNEL,2)) --#define free_thread_info(ti) free_pages((unsigned long) (ti), 2) -+#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) -+#define alloc_thread_info(tsk) \ -+ ((struct thread_info *) kmalloc(THREAD_SIZE, GFP_KERNEL)) -+#define free_thread_info(ti) kfree(ti) -+ - #define get_thread_info(ti) get_task_struct((ti)->task) - #define put_thread_info(ti) put_task_struct((ti)->task) - -@@ -65,11 +69,13 @@ - #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling - * TIF_NEED_RESCHED - */ -+#define TIF_RESTART_BLOCK 4 - - #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) - #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) - #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) - #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) -+#define _TIF_RESTART_BLOCK (1 << TIF_RESTART_BLOCK) - - #endif - -Index: uml-2.6.7/include/asm-um/dma-mapping.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/dma-mapping.h 2004-07-16 19:36:02.148819792 +0300 -+++ uml-2.6.7/include/asm-um/dma-mapping.h 2004-07-16 19:47:23.789194704 +0300 -@@ -1 +1,119 @@ --#include -+#ifndef _ASM_DMA_MAPPING_H -+#define _ASM_DMA_MAPPING_H -+ -+static inline int -+dma_supported(struct device *dev, u64 mask) -+{ -+ BUG(); -+ return(0); -+} -+ -+static inline int -+dma_set_mask(struct device *dev, u64 dma_mask) -+{ -+ BUG(); -+ return(0); -+} -+ -+static inline void * -+dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, -+ int flag) -+{ -+ BUG(); -+ return((void *) 0); -+} -+ -+static inline void -+dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, -+ dma_addr_t dma_handle) -+{ -+ BUG(); -+} -+ -+static inline dma_addr_t -+dma_map_single(struct device *dev, void *cpu_addr, size_t size, -+ enum dma_data_direction direction) -+{ -+ BUG(); -+ return(0); -+} -+ -+static inline void -+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, -+ enum dma_data_direction direction) -+{ -+ BUG(); -+} -+ -+static inline dma_addr_t -+dma_map_page(struct device *dev, struct page *page, -+ unsigned long offset, size_t size, -+ enum dma_data_direction direction) -+{ -+ BUG(); -+ return(0); -+} -+ -+static inline void -+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, -+ enum dma_data_direction direction) -+{ -+ BUG(); -+} -+ -+static inline int -+dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, -+ enum dma_data_direction direction) -+{ -+ BUG(); -+ return(0); -+} -+ -+static inline void -+dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, -+ enum dma_data_direction direction) -+{ -+ BUG(); -+} -+ -+static inline void -+dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size, -+ enum dma_data_direction direction) -+{ -+ BUG(); -+} -+ -+static inline void -+dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems, -+ enum dma_data_direction direction) -+{ -+ BUG(); -+} -+ -+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -+#define dma_is_consistent(d) (1) -+ -+static inline int -+dma_get_cache_alignment(void) -+{ -+ BUG(); -+ return(0); -+} -+ -+static inline void -+dma_sync_single_range(struct device *dev, dma_addr_t dma_handle, -+ unsigned long offset, size_t size, -+ enum dma_data_direction direction) -+{ -+ BUG(); -+} -+ -+static inline void -+dma_cache_sync(void *vaddr, size_t size, -+ enum dma_data_direction direction) -+{ -+ BUG(); -+} -+ -+#endif -Index: uml-2.6.7/arch/um/kernel/skas/syscall_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/syscall_user.c 2004-07-16 19:37:35.225669960 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/syscall_user.c 2004-07-16 19:47:23.732203368 +0300 -@@ -22,7 +22,7 @@ - - index = record_syscall_start(UPT_SYSCALL_NR(regs)); - -- syscall_trace(); -+ syscall_trace(regs, 1); - result = execute_syscall(regs); - - REGS_SET_SYSCALL_RETURN(regs->skas.regs, result); -@@ -30,7 +30,7 @@ - (result == -ERESTARTNOINTR)) - do_signal(result); - -- syscall_trace(); -+ syscall_trace(regs, 0); - record_syscall_end(index, result); - } - -Index: uml-2.6.7/arch/um/kernel/process.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/process.c 2004-07-16 19:37:22.968533328 +0300 -+++ uml-2.6.7/arch/um/kernel/process.c 2004-07-16 19:47:24.795041792 +0300 -@@ -9,18 +9,17 @@ - #include - #include - #include --#include - #include - #include - #include - #include --#include - #include - #include - #include - #include - #include - #include -+#include - #include "user_util.h" - #include "kern_util.h" - #include "user.h" -@@ -58,7 +57,11 @@ - { - int flags = altstack ? SA_ONSTACK : 0; - -- set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, -+ /* NODEFER is set here because SEGV isn't turned back on when the -+ * handler is ready to receive signals. This causes any segfault -+ * during a copy_user to kill the process because the fault is blocked. -+ */ -+ set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags | SA_NODEFER, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -@@ -72,7 +75,6 @@ - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGUSR2, (__sighandler_t) sig_handler, - SA_NOMASK | flags, -1); -- (void) CHOOSE_MODE(signal(SIGCHLD, SIG_IGN), (void *) 0); - signal(SIGHUP, SIG_IGN); - - init_irq_signals(altstack); -@@ -123,11 +125,12 @@ - /* Start the process and wait for it to kill itself */ - new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg); - if(new_pid < 0) return(-errno); -- while((err = waitpid(new_pid, &status, 0) < 0) && (errno == EINTR)) ; -+ CATCH_EINTR(err = waitpid(new_pid, &status, 0)); - if(err < 0) panic("Waiting for outer trampoline failed - errno = %d", - errno); - if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) -- panic("outer trampoline didn't exit with SIGKILL"); -+ panic("outer trampoline didn't exit with SIGKILL, " -+ "status = %d", status); - - return(arg.pid); - } -@@ -138,7 +141,7 @@ - - os_stop_process(os_getpid()); - -- if(read(fd, &c, sizeof(c)) != sizeof(c)) -+ if(os_read_file(fd, &c, sizeof(c)) != sizeof(c)) - panic("read failed in suspend_new_thread"); - } - -@@ -168,7 +171,7 @@ - pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL); - if(pid < 0) - panic("check_ptrace : clone failed, errno = %d", errno); -- n = waitpid(pid, &status, WUNTRACED); -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) -@@ -185,7 +188,7 @@ - - if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) - panic("check_ptrace : ptrace failed, errno = %d", errno); -- n = waitpid(pid, &status, 0); -+ CATCH_EINTR(n = waitpid(pid, &status, 0)); - if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) - panic("check_ptrace : child exited with status 0x%x", status); - -@@ -193,6 +196,22 @@ - panic("check_ptrace : munmap failed, errno = %d", errno); - } - -+static int force_sysemu_disabled = 0; -+ -+static int __init nosysemu_cmd_param(char *str, int* add) -+{ -+ force_sysemu_disabled = 1; -+ return 0; -+} -+ -+__uml_setup("nosysemu", nosysemu_cmd_param, -+ "nosysemu\n" -+ " Turns off syscall emulation patch for ptrace (SYSEMU) on.\n" -+ " SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n" -+ " behaviour of ptrace() and helps reducing host context switch rate.\n" -+ " To make it working, you need a kernel patch for your host, too.\n" -+ " See http://perso.wanadoo.fr/laurent.vivier/UML/ for further information.\n"); -+ - void __init check_ptrace(void) - { - void *stack; -@@ -205,7 +224,7 @@ - if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) - panic("check_ptrace : ptrace failed, errno = %d", - errno); -- n = waitpid(pid, &status, WUNTRACED); -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) -@@ -225,6 +244,45 @@ - } - stop_ptraced_child(pid, stack, 0); - printk("OK\n"); -+ -+#ifdef UML_CONFIG_MODE_SKAS -+ printk("Checking syscall emulation patch for ptrace..."); -+ set_using_sysemu(0); -+ pid = start_ptraced_child(&stack); -+ if(ptrace(PTRACE_SYSEMU, pid, 0, 0) >= 0) { -+ struct user_regs_struct regs; -+ -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); -+ if (n < 0) -+ panic("check_ptrace : wait failed, errno = %d", errno); -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) -+ panic("check_ptrace : expected SIGTRAP, " -+ "got status = %d", status); -+ -+ if (ptrace(PTRACE_GETREGS, pid, 0, ®s) < 0) -+ panic("check_ptrace : failed to read child " -+ "registers, errno = %d", errno); -+ regs.orig_eax = pid; -+ if (ptrace(PTRACE_SETREGS, pid, 0, ®s) < 0) -+ panic("check_ptrace : failed to modify child " -+ "registers, errno = %d", errno); -+ -+ stop_ptraced_child(pid, stack, 0); -+ -+ if (!force_sysemu_disabled) { -+ printk("found\n"); -+ set_using_sysemu(1); -+ } else { -+ printk("found but disabled\n"); -+ } -+ } -+ else -+ { -+ printk("missing\n"); -+ stop_ptraced_child(pid, stack, 1); -+ } -+#endif -+ - } - - int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr) -@@ -233,7 +289,7 @@ - int n; - - *jmp_ptr = &buf; -- n = setjmp(buf); -+ n = sigsetjmp(buf, 1); - if(n != 0) - return(n); - (*fn)(arg); -@@ -273,7 +329,7 @@ - stop_ptraced_child(pid, stack, 1); - - printf("Checking for /proc/mm..."); -- if(access("/proc/mm", W_OK)){ -+ if(os_access("/proc/mm", OS_ACC_W_OK) < 0){ - printf("not found\n"); - ret = 0; - } -Index: uml-2.6.7/arch/um/drivers/stdio_console.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/stdio_console.c 2004-07-16 19:36:13.845041696 +0300 -+++ uml-2.6.7/arch/um/drivers/stdio_console.c 2004-07-16 19:47:23.694209144 +0300 -@@ -83,7 +83,8 @@ - - static struct line_driver driver = { - .name = "UML console", -- .devfs_name = "vc/%d", -+ .device_name = "tty", -+ .devfs_name = "vc/", - .major = TTY_MAJOR, - .minor_start = 0, - .type = TTY_DRIVER_TYPE_CONSOLE, -@@ -159,6 +160,15 @@ - - static int con_init_done = 0; - -+static struct tty_operations console_ops = { -+ .open = con_open, -+ .close = con_close, -+ .write = con_write, -+ .chars_in_buffer = chars_in_buffer, -+ .set_termios = set_termios, -+ .write_room = line_write_room, -+}; -+ - int stdio_init(void) - { - char *new_title; -@@ -166,7 +176,8 @@ - printk(KERN_INFO "Initializing stdio console driver\n"); - - console_driver = line_register_devfs(&console_lines, &driver, -- &console_ops, vts, sizeof(vts)/sizeof(vts[0])); -+ &console_ops, vts, -+ sizeof(vts)/sizeof(vts[0])); - - lines_init(vts, sizeof(vts)/sizeof(vts[0])); - -@@ -178,24 +189,19 @@ - return(0); - } - --__initcall(stdio_init); -+late_initcall(stdio_init); - - static void console_write(struct console *console, const char *string, - unsigned len) - { -- if(con_init_done) down(&vts[console->index].sem); -- console_write_chan(&vts[console->index].chan_list, string, len); -- if(con_init_done) up(&vts[console->index].sem); --} -+ struct line *line = &vts[console->index]; - --static struct tty_operations console_ops = { -- .open = con_open, -- .close = con_close, -- .write = con_write, -- .chars_in_buffer = chars_in_buffer, -- .set_termios = set_termios, -- .write_room = line_write_room, --}; -+ if(con_init_done) -+ down(&line->sem); -+ console_write_chan(&line->chan_list, string, len); -+ if(con_init_done) -+ up(&line->sem); -+} - - static struct tty_driver *console_device(struct console *c, int *index) - { -@@ -208,22 +214,28 @@ - return(0); - } - --static struct console stdiocons = INIT_CONSOLE("tty", console_write, -- console_device, console_setup, -- CON_PRINTBUFFER); -+static struct console stdiocons = { -+ name: "tty", -+ write: console_write, -+ device: console_device, -+ setup: console_setup, -+ flags: CON_PRINTBUFFER, -+ index: -1, -+}; - --static void __init stdio_console_init(void) -+static int __init stdio_console_init(void) - { - INIT_LIST_HEAD(&vts[0].chan_list); - list_add(&init_console_chan.list, &vts[0].chan_list); - register_console(&stdiocons); -+ return(0); - } -+ - console_initcall(stdio_console_init); - - static int console_chan_setup(char *str) - { -- line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1); -- return(1); -+ return(line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1)); - } - - __setup("con", console_chan_setup); -Index: uml-2.6.7/arch/um/os-Linux/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/os-Linux/Makefile 2004-07-16 19:36:54.369880984 +0300 -+++ uml-2.6.7/arch/um/os-Linux/Makefile 2004-07-16 19:47:23.768197896 +0300 -@@ -3,13 +3,9 @@ - # Licensed under the GPL - # - --obj-y = file.o process.o tty.o drivers/ -+obj-y = file.o process.o tty.o user_syms.o drivers/ - - USER_OBJS := $(foreach file,file.o process.o tty.o,$(obj)/$(file)) - - $(USER_OBJS) : %.o: %.c - $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< -- --clean : -- --archmrproper: -Index: uml-2.6.7/arch/um/kernel/skas/syscall_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/syscall_kern.c 2004-07-16 19:36:57.006480160 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/syscall_kern.c 2004-07-16 19:47:23.732203368 +0300 -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -Index: uml-2.6.7/arch/um/drivers/pty.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/pty.c 2004-07-16 19:37:26.082060000 +0300 -+++ uml-2.6.7/arch/um/drivers/pty.c 2004-07-16 19:47:23.692209448 +0300 -@@ -7,12 +7,12 @@ - #include - #include - #include --#include - #include - #include "chan_user.h" - #include "user.h" - #include "user_util.h" - #include "kern_util.h" -+#include "os.h" - - struct pty_chan { - void (*announce)(char *dev_name, int dev); -@@ -26,7 +26,8 @@ - { - struct pty_chan *data; - -- if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); -+ data = um_kmalloc(sizeof(*data)); -+ if(data == NULL) return(NULL); - *data = ((struct pty_chan) { .announce = opts->announce, - .dev = device, - .raw = opts->raw }); -@@ -39,7 +40,8 @@ - char *dev; - int fd; - -- if((fd = get_pty()) < 0){ -+ fd = get_pty(); -+ if(fd < 0){ - printk("open_pts : Failed to open pts\n"); - return(-errno); - } -@@ -57,29 +59,27 @@ - - int getmaster(char *line) - { -- struct stat stb; - char *pty, *bank, *cp; -- int master; -+ int master, err; - - pty = &line[strlen("/dev/ptyp")]; - for (bank = "pqrs"; *bank; bank++) { - line[strlen("/dev/pty")] = *bank; - *pty = '0'; -- if (stat(line, &stb) < 0) -+ if (os_stat_file(line, NULL) < 0) - break; - for (cp = "0123456789abcdef"; *cp; cp++) { - *pty = *cp; -- master = open(line, O_RDWR); -+ master = os_open_file(line, of_rdwr(OPENFLAGS()), 0); - if (master >= 0) { - char *tp = &line[strlen("/dev/")]; -- int ok; - - /* verify slave side is usable */ - *tp = 't'; -- ok = access(line, R_OK|W_OK) == 0; -+ err = os_access(line, OS_ACC_RW_OK); - *tp = 'p'; -- if (ok) return(master); -- (void) close(master); -+ if(err == 0) return(master); -+ (void) os_close_file(master); - } - } - } -Index: uml-2.6.7/arch/um/util/mk_constants_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/util/mk_constants_kern.c 2004-07-16 19:35:59.020295400 +0300 -+++ uml-2.6.7/arch/um/util/mk_constants_kern.c 2004-07-16 19:47:23.781195920 +0300 -@@ -1,5 +1,6 @@ - #include "linux/kernel.h" - #include "linux/stringify.h" -+#include "linux/time.h" - #include "asm/page.h" - - extern void print_head(void); -@@ -11,6 +12,7 @@ - { - print_head(); - print_constant_int("UM_KERN_PAGE_SIZE", PAGE_SIZE); -+ - print_constant_str("UM_KERN_EMERG", KERN_EMERG); - print_constant_str("UM_KERN_ALERT", KERN_ALERT); - print_constant_str("UM_KERN_CRIT", KERN_CRIT); -@@ -19,6 +21,8 @@ - print_constant_str("UM_KERN_NOTICE", KERN_NOTICE); - print_constant_str("UM_KERN_INFO", KERN_INFO); - print_constant_str("UM_KERN_DEBUG", KERN_DEBUG); -+ -+ print_constant_int("UM_NSEC_PER_SEC", NSEC_PER_SEC); - print_tail(); - return(0); - } -Index: uml-2.6.7/arch/um/drivers/port_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/port_user.c 2004-07-16 19:36:57.124462224 +0300 -+++ uml-2.6.7/arch/um/drivers/port_user.c 2004-07-16 19:47:23.691209600 +0300 -@@ -47,10 +47,12 @@ - return(NULL); - } - -- if((kern_data = port_data(port)) == NULL) -+ kern_data = port_data(port); -+ if(kern_data == NULL) - return(NULL); - -- if((data = um_kmalloc(sizeof(*data))) == NULL) -+ data = um_kmalloc(sizeof(*data)); -+ if(data == NULL) - goto err; - - *data = ((struct port_chan) { .raw = opts->raw, -@@ -90,7 +92,7 @@ - struct port_chan *data = d; - - port_remove_dev(data->kernel_data); -- close(fd); -+ os_close_file(fd); - } - - int port_console_write(int fd, const char *buf, int n, void *d) -@@ -130,11 +132,15 @@ - goto out; - } - -- if((listen(fd, 1) < 0) || (os_set_fd_block(fd, 0))){ -+ if(listen(fd, 1) < 0){ - err = -errno; - goto out; - } - -+ err = os_set_fd_block(fd, 0); -+ if(err < 0) -+ goto out; -+ - return(fd); - out: - os_close_file(fd); -@@ -153,10 +159,10 @@ - dup2(data->sock_fd, 0); - dup2(data->sock_fd, 1); - dup2(data->sock_fd, 2); -- close(data->sock_fd); -+ os_close_file(data->sock_fd); - dup2(data->pipe_fd, 3); - os_shutdown_socket(3, 1, 0); -- close(data->pipe_fd); -+ os_close_file(data->pipe_fd); - } - - int port_connection(int fd, int *socket, int *pid_out) -@@ -166,11 +172,12 @@ - "/usr/lib/uml/port-helper", NULL }; - struct port_pre_exec_data data; - -- if((new = os_accept_connection(fd)) < 0) -- return(-errno); -+ new = os_accept_connection(fd); -+ if(new < 0) -+ return(new); - - err = os_pipe(socket, 0, 0); -- if(err) -+ if(err < 0) - goto out_close; - - data = ((struct port_pre_exec_data) -@@ -186,11 +193,11 @@ - - out_shutdown: - os_shutdown_socket(socket[0], 1, 1); -- close(socket[0]); -+ os_close_file(socket[0]); - os_shutdown_socket(socket[1], 1, 1); -- close(socket[1]); -+ os_close_file(socket[1]); - out_close: -- close(new); -+ os_close_file(new); - return(err); - } - -Index: uml-2.6.7/arch/um/drivers/port_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/port_kern.c 2004-07-16 19:35:56.025750640 +0300 -+++ uml-2.6.7/arch/um/drivers/port_kern.c 2004-07-16 19:47:23.690209752 +0300 -@@ -6,6 +6,7 @@ - #include "linux/list.h" - #include "linux/sched.h" - #include "linux/slab.h" -+#include "linux/interrupt.h" - #include "linux/irq.h" - #include "linux/spinlock.h" - #include "linux/errno.h" -@@ -14,6 +15,7 @@ - #include "kern_util.h" - #include "kern.h" - #include "irq_user.h" -+#include "irq_kern.h" - #include "port.h" - #include "init.h" - #include "os.h" -@@ -38,21 +40,21 @@ - struct connection { - struct list_head list; - int fd; -- int helper_pid; -+ int helper_pid; - int socket[2]; - int telnetd_pid; - struct port_list *port; - }; - --static void pipe_interrupt(int irq, void *data, struct pt_regs *regs) -+static irqreturn_t pipe_interrupt(int irq, void *data, struct pt_regs *regs) - { - struct connection *conn = data; - int fd; - -- fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); -+ fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); - if(fd < 0){ - if(fd == -EAGAIN) -- return; -+ return(IRQ_NONE); - - printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", - -fd); -@@ -65,6 +67,7 @@ - list_add(&conn->list, &conn->port->connections); - - up(&conn->port->sem); -+ return(IRQ_HANDLED); - } - - static int port_accept(struct port_list *port) -@@ -102,8 +105,7 @@ - } - - list_add(&conn->list, &port->pending); -- ret = 1; -- goto out; -+ return(1); - - out_free: - kfree(conn); -@@ -138,12 +140,13 @@ - - DECLARE_WORK(port_work, port_work_proc, NULL); - --static void port_interrupt(int irq, void *data, struct pt_regs *regs) -+static irqreturn_t port_interrupt(int irq, void *data, struct pt_regs *regs) - { - struct port_list *port = data; - - port->has_connection = 1; - schedule_work(&port_work); -+ return(IRQ_HANDLED); - } - - void *port_data(int port_num) -Index: uml-2.6.7/arch/um/sys-i386/ptrace_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/sys-i386/ptrace_user.c 2004-07-16 19:37:52.078108000 +0300 -+++ uml-2.6.7/arch/um/sys-i386/ptrace_user.c 2004-07-16 19:47:23.772197288 +0300 -@@ -39,10 +39,10 @@ - nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); - for(i = 0; i < nregs; i++){ - if((i == 4) || (i == 5)) continue; -- if(ptrace(PTRACE_POKEUSR, pid, &dummy->u_debugreg[i], -+ if(ptrace(PTRACE_POKEUSER, pid, &dummy->u_debugreg[i], - regs[i]) < 0) -- printk("write_debugregs - ptrace failed, " -- "errno = %d\n", errno); -+ printk("write_debugregs - ptrace failed on " -+ "register %d, errno = %d\n", errno); - } - } - -@@ -54,7 +54,7 @@ - dummy = NULL; - nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); - for(i = 0; i < nregs; i++){ -- regs[i] = ptrace(PTRACE_PEEKUSR, pid, -+ regs[i] = ptrace(PTRACE_PEEKUSER, pid, - &dummy->u_debugreg[i], 0); - } - } -Index: uml-2.6.7/arch/um/os-Linux/drivers/tuntap_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/os-Linux/drivers/tuntap_user.c 2004-07-16 19:37:46.493956920 +0300 -+++ uml-2.6.7/arch/um/os-Linux/drivers/tuntap_user.c 2004-07-16 19:47:24.802040728 +0300 -@@ -8,7 +8,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -19,6 +18,7 @@ - #include "net_user.h" - #include "tuntap.h" - #include "kern_util.h" -+#include "user_util.h" - #include "user.h" - #include "helper.h" - #include "os.h" -@@ -61,7 +61,7 @@ - struct tuntap_pre_exec_data *data = arg; - - dup2(data->stdout, 1); -- close(data->close_me); -+ os_close_file(data->close_me); - } - - static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, -@@ -86,7 +86,7 @@ - - if(pid < 0) return(-pid); - -- close(remote); -+ os_close_file(remote); - - msg.msg_name = NULL; - msg.msg_namelen = 0; -@@ -107,19 +107,19 @@ - if(n < 0){ - printk("tuntap_open_tramp : recvmsg failed - errno = %d\n", - errno); -- return(errno); -+ return(-errno); - } -- waitpid(pid, NULL, 0); -+ CATCH_EINTR(waitpid(pid, NULL, 0)); - - cmsg = CMSG_FIRSTHDR(&msg); - if(cmsg == NULL){ - printk("tuntap_open_tramp : didn't receive a message\n"); -- return(EINVAL); -+ return(-EINVAL); - } - if((cmsg->cmsg_level != SOL_SOCKET) || - (cmsg->cmsg_type != SCM_RIGHTS)){ - printk("tuntap_open_tramp : didn't receive a descriptor\n"); -- return(EINVAL); -+ return(-EINVAL); - } - *fd_out = ((int *) CMSG_DATA(cmsg))[0]; - return(0); -@@ -133,27 +133,29 @@ - int err, fds[2], len, used; - - err = tap_open_common(pri->dev, pri->gate_addr); -- if(err) return(err); -+ if(err < 0) -+ return(err); - - if(pri->fixed_config){ -- if((pri->fd = open("/dev/net/tun", O_RDWR)) < 0){ -- printk("Failed to open /dev/net/tun, errno = %d\n", -- errno); -- return(-errno); -+ pri->fd = os_open_file("/dev/net/tun", of_rdwr(OPENFLAGS()), 0); -+ if(pri->fd < 0){ -+ printk("Failed to open /dev/net/tun, err = %d\n", -+ -pri->fd); -+ return(pri->fd); - } - memset(&ifr, 0, sizeof(ifr)); -- ifr.ifr_flags = IFF_TAP; -+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name)); - if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){ -- printk("TUNSETIFF failed, errno = %d", errno); -- close(pri->fd); -+ printk("TUNSETIFF failed, errno = %d\n", errno); -+ os_close_file(pri->fd); - return(-errno); - } - } - else { - err = os_pipe(fds, 0, 0); -- if(err){ -- printk("tuntap_open : os_pipe failed - errno = %d\n", -+ if(err < 0){ -+ printk("tuntap_open : os_pipe failed - err = %d\n", - -err); - return(err); - } -@@ -166,19 +168,19 @@ - fds[1], buffer, len, &used); - - output = buffer; -- if(err == 0){ -- pri->dev_name = uml_strdup(buffer); -- output += IFNAMSIZ; -- printk(output); -- free_output_buffer(buffer); -- } -- else { -- printk(output); -+ if(err < 0) { -+ printk("%s", output); - free_output_buffer(buffer); -- printk("tuntap_open_tramp failed - errno = %d\n", err); -- return(-err); -+ printk("tuntap_open_tramp failed - err = %d\n", -err); -+ return(err); - } -- close(fds[0]); -+ -+ pri->dev_name = uml_strdup(buffer); -+ output += IFNAMSIZ; -+ printk("%s", output); -+ free_output_buffer(buffer); -+ -+ os_close_file(fds[0]); - iter_addresses(pri->dev, open_addr, pri->dev_name); - } - -@@ -191,7 +193,7 @@ - - if(!pri->fixed_config) - iter_addresses(pri->dev, close_addr, pri->dev_name); -- close(fd); -+ os_close_file(fd); - pri->fd = -1; - } - -Index: uml-2.6.7/arch/um/kernel/um_arch.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/um_arch.c 2004-07-16 19:37:17.463370240 +0300 -+++ uml-2.6.7/arch/um/kernel/um_arch.c 2004-07-16 19:47:23.755199872 +0300 -@@ -38,13 +38,18 @@ - #include "mode_kern.h" - #include "mode.h" - --#define DEFAULT_COMMAND_LINE "root=6200" -+#define DEFAULT_COMMAND_LINE "root=98:0" - - struct cpuinfo_um boot_cpu_data = { - .loops_per_jiffy = 0, - .ipi_pipe = { -1, -1 } - }; - -+/* Placeholder to make UML link until the vsyscall stuff is actually -+ * implemented -+ */ -+void *__kernel_vsyscall; -+ - unsigned long thread_saved_pc(struct task_struct *task) - { - return(os_process_pc(CHOOSE_MODE_PROC(thread_pid_tt, thread_pid_skas, -@@ -53,18 +58,22 @@ - - static int show_cpuinfo(struct seq_file *m, void *v) - { -- int index; -+ int index = 0; - -- index = (struct cpuinfo_um *)v - cpu_data; - #ifdef CONFIG_SMP -+ index = (struct cpuinfo_um *) v - cpu_data; - if (!cpu_online(index)) - return 0; - #endif - -- seq_printf(m, "bogomips\t: %lu.%02lu\n", -+ seq_printf(m, "processor\t: %d\n", index); -+ seq_printf(m, "vendor_id\t: User Mode Linux\n"); -+ seq_printf(m, "model name\t: UML\n"); -+ seq_printf(m, "mode\t\t: %s\n", CHOOSE_MODE("tt", "skas")); -+ seq_printf(m, "host\t\t: %s\n", host_info); -+ seq_printf(m, "bogomips\t: %lu.%02lu\n\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100); -- seq_printf(m, "host\t\t: %s\n", host_info); - - return(0); - } -@@ -134,12 +143,12 @@ - if(umid != NULL){ - snprintf(argv1_begin, - (argv1_end - argv1_begin) * sizeof(*ptr), -- "(%s)", umid); -+ "(%s) ", umid); - ptr = &argv1_begin[strlen(argv1_begin)]; - } - else ptr = argv1_begin; - -- snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), " [%s]", cmd); -+ snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), "[%s]", cmd); - memset(argv1_begin + strlen(argv1_begin), '\0', - argv1_end - argv1_begin - strlen(argv1_begin)); - #endif -@@ -179,7 +188,7 @@ - static int __init uml_ncpus_setup(char *line, int *add) - { - if (!sscanf(line, "%d", &ncpus)) { -- printk("Couldn't parse [%s]\n", line); -+ printf("Couldn't parse [%s]\n", line); - return -1; - } - -@@ -210,7 +219,7 @@ - - static int __init mode_tt_setup(char *line, int *add) - { -- printk("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n"); -+ printf("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n"); - return(0); - } - -@@ -221,7 +230,7 @@ - - static int __init mode_tt_setup(char *line, int *add) - { -- printk("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n"); -+ printf("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n"); - return(0); - } - -@@ -291,7 +300,7 @@ - - /* Set during early boot */ - unsigned long brk_start; --static struct vm_reserved kernel_vm_reserved; -+unsigned long end_iomem; - - #define MIN_VMALLOC (32 * 1024 * 1024) - -@@ -299,7 +308,7 @@ - { - unsigned long avail; - unsigned long virtmem_size, max_physmem; -- unsigned int i, add, err; -+ unsigned int i, add; - - for (i = 1; i < argc; i++){ - if((i == 1) && (argv[i][0] == ' ')) continue; -@@ -328,12 +337,16 @@ - argv1_end = &argv[1][strlen(argv[1])]; - #endif - -- set_usable_vm(uml_physmem, get_kmem_end()); -- - highmem = 0; -- max_physmem = get_kmem_end() - uml_physmem - MIN_VMALLOC; -- if(physmem_size > max_physmem){ -- highmem = physmem_size - max_physmem; -+ iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; -+ max_physmem = get_kmem_end() - uml_physmem - iomem_size - MIN_VMALLOC; -+ -+ /* Zones have to begin on a 1 << MAX_ORDER page boundary, -+ * so this makes sure that's true for highmem -+ */ -+ max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1); -+ if(physmem_size + iomem_size > max_physmem){ -+ highmem = physmem_size + iomem_size - max_physmem; - physmem_size -= highmem; - #ifndef CONFIG_HIGHMEM - highmem = 0; -@@ -343,11 +356,19 @@ - } - - high_physmem = uml_physmem + physmem_size; -- high_memory = (void *) high_physmem; -+ end_iomem = high_physmem + iomem_size; -+ high_memory = (void *) end_iomem; - - start_vm = VMALLOC_START; - -- setup_physmem(uml_physmem, uml_reserved, physmem_size); -+ setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); -+ if(init_maps(physmem_size, iomem_size, highmem)){ -+ printf("Failed to allocate mem_map for %ld bytes of physical " -+ "memory and %ld bytes of highmem\n", physmem_size, -+ highmem); -+ exit(1); -+ } -+ - virtmem_size = physmem_size; - avail = get_kmem_end() - start_vm; - if(physmem_size > avail) virtmem_size = avail; -@@ -357,28 +378,26 @@ - printf("Kernel virtual memory size shrunk to %ld bytes\n", - virtmem_size); - -- err = reserve_vm(high_physmem, end_vm, &kernel_vm_reserved); -- if(err){ -- printf("Failed to reserve VM area for kernel VM\n"); -- exit(1); -- } -- - uml_postsetup(); - - init_task.thread.kernel_stack = (unsigned long) &init_thread_info + - 2 * PAGE_SIZE; - - task_protections((unsigned long) &init_thread_info); -+ os_flush_stdout(); - - return(CHOOSE_MODE(start_uml_tt(), start_uml_skas())); - } - -+extern int uml_exitcode; -+ - static int panic_exit(struct notifier_block *self, unsigned long unused1, - void *unused2) - { - #ifdef CONFIG_MAGIC_SYSRQ -- handle_sysrq('p', ¤t->thread.regs, NULL, NULL); -+ handle_sysrq('p', ¤t->thread.regs, NULL); - #endif -+ uml_exitcode = 1; - machine_halt(); - return(0); - } -@@ -403,6 +422,11 @@ - arch_check_bugs(); - check_ptrace(); - check_sigio(); -+ check_devanon(); -+} -+ -+void apply_alternatives(void *start, void *end) -+{ - } - - /* -Index: uml-2.6.7/arch/um/kernel/mem.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/mem.c 2004-07-16 19:36:30.355531720 +0300 -+++ uml-2.6.7/arch/um/kernel/mem.c 2004-07-16 19:47:23.720205192 +0300 -@@ -1,74 +1,66 @@ - /* -- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - --#include "linux/config.h" --#include "linux/module.h" --#include "linux/types.h" -+#include "linux/stddef.h" -+#include "linux/kernel.h" - #include "linux/mm.h" --#include "linux/fs.h" --#include "linux/init.h" - #include "linux/bootmem.h" - #include "linux/swap.h" --#include "linux/slab.h" --#include "linux/vmalloc.h" - #include "linux/highmem.h" -+#include "linux/gfp.h" - #include "asm/page.h" --#include "asm/pgtable.h" -+#include "asm/fixmap.h" - #include "asm/pgalloc.h" --#include "asm/bitops.h" --#include "asm/uaccess.h" --#include "asm/tlb.h" - #include "user_util.h" - #include "kern_util.h" --#include "mem_user.h" --#include "mem.h" - #include "kern.h" --#include "init.h" --#include "os.h" --#include "mode_kern.h" -+#include "mem_user.h" - #include "uml_uaccess.h" -+#include "os.h" -+ -+extern char __binary_start; - - /* Changed during early boot */ --pgd_t swapper_pg_dir[1024]; --unsigned long high_physmem; --unsigned long vm_start; --unsigned long vm_end; --unsigned long highmem; - unsigned long *empty_zero_page = NULL; - unsigned long *empty_bad_page = NULL; -- --/* Not modified */ --const char bad_pmd_string[] = "Bad pmd in pte_alloc: %08lx\n"; -- --extern char __init_begin, __init_end; --extern long physmem_size; -- --/* Not changed by UML */ --DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -- --/* Changed during early boot */ -+pgd_t swapper_pg_dir[1024]; -+unsigned long highmem; - int kmalloc_ok = 0; - --#define NREGIONS (phys_region_index(0xffffffff) - phys_region_index(0x0) + 1) --struct mem_region *regions[NREGIONS] = { [ 0 ... NREGIONS - 1 ] = NULL }; --#define REGION_SIZE ((0xffffffff & ~REGION_MASK) + 1) -- --/* Changed during early boot */ - static unsigned long brk_end; - -+void unmap_physmem(void) -+{ -+ os_unmap_memory((void *) brk_end, uml_reserved - brk_end); -+} -+ - static void map_cb(void *unused) - { - map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); - } - --void unmap_physmem(void) -+#ifdef CONFIG_HIGHMEM -+static void setup_highmem(unsigned long highmem_start, -+ unsigned long highmem_len) - { -- os_unmap_memory((void *) brk_end, uml_reserved - brk_end); --} -+ struct page *page; -+ unsigned long highmem_pfn; -+ int i; - --extern char __binary_start; -+ highmem_start_page = virt_to_page(highmem_start); -+ -+ highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT; -+ for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){ -+ page = &mem_map[highmem_pfn + i]; -+ ClearPageReserved(page); -+ set_bit(PG_highmem, &page->flags); -+ set_page_count(page, 1); -+ __free_page(page); -+ } -+} -+#endif - - void mem_init(void) - { -@@ -103,50 +95,15 @@ - totalhigh_pages = highmem >> PAGE_SHIFT; - totalram_pages += totalhigh_pages; - num_physpages = totalram_pages; -- max_mapnr = totalram_pages; - max_pfn = totalram_pages; - printk(KERN_INFO "Memory: %luk available\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10)); - kmalloc_ok = 1; --} -- --/* Changed during early boot */ --static unsigned long kmem_top = 0; -- --unsigned long get_kmem_end(void) --{ -- if(kmem_top == 0) -- kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); -- return(kmem_top); --} -- --void set_kmem_end(unsigned long new) --{ -- kmem_top = new; --} - - #ifdef CONFIG_HIGHMEM --/* Changed during early boot */ --pte_t *kmap_pte; --pgprot_t kmap_prot; -- --EXPORT_SYMBOL(kmap_prot); --EXPORT_SYMBOL(kmap_pte); -- --#define kmap_get_fixmap_pte(vaddr) \ -- pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) -- --void __init kmap_init(void) --{ -- unsigned long kmap_vstart; -- -- /* cache the first kmap pte */ -- kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); -- kmap_pte = kmap_get_fixmap_pte(kmap_vstart); -- -- kmap_prot = PAGE_KERNEL; -+ setup_highmem(end_iomem, highmem); -+#endif - } --#endif /* CONFIG_HIGHMEM */ - - static void __init fixrange_init(unsigned long start, unsigned long end, - pgd_t *pgd_base) -@@ -178,76 +135,24 @@ - } - } - --int init_maps(struct mem_region *region) --{ -- struct page *p, *map; -- int i, n, len; -- -- if(region == &physmem_region){ -- region->mem_map = mem_map; -- return(0); -- } -- else if(region->mem_map != NULL) return(0); -- -- n = region->len >> PAGE_SHIFT; -- len = n * sizeof(struct page); -- if(kmalloc_ok){ -- map = kmalloc(len, GFP_KERNEL); -- if(map == NULL) map = vmalloc(len); -- } -- else map = alloc_bootmem_low_pages(len); -- -- if(map == NULL) -- return(-ENOMEM); -- for(i = 0; i < n; i++){ -- p = &map[i]; -- set_page_count(p, 0); -- SetPageReserved(p); -- INIT_LIST_HEAD(&p->list); -- } -- region->mem_map = map; -- return(0); --} -+#if CONFIG_HIGHMEM -+pte_t *kmap_pte; -+pgprot_t kmap_prot; - --DECLARE_MUTEX(regions_sem); -+#define kmap_get_fixmap_pte(vaddr) \ -+ pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) - --static int setup_one_range(int fd, char *driver, unsigned long start, -- unsigned long pfn, int len, -- struct mem_region *region) -+void __init kmap_init(void) - { -- int i; -- -- down(®ions_sem); -- for(i = 0; i < NREGIONS; i++){ -- if(regions[i] == NULL) break; -- } -- if(i == NREGIONS){ -- printk("setup_range : no free regions\n"); -- i = -1; -- goto out; -- } -- -- if(fd == -1) -- fd = create_mem_file(len); -+ unsigned long kmap_vstart; - -- if(region == NULL){ -- region = alloc_bootmem_low_pages(sizeof(*region)); -- if(region == NULL) -- panic("Failed to allocating mem_region"); -- } -+ /* cache the first kmap pte */ -+ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); -+ kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - -- *region = ((struct mem_region) { .driver = driver, -- .start_pfn = pfn, -- .start = start, -- .len = len, -- .fd = fd } ); -- regions[i] = region; -- out: -- up(®ions_sem); -- return(i); -+ kmap_prot = PAGE_KERNEL; - } - --#ifdef CONFIG_HIGHMEM - static void init_highmem(void) - { - pgd_t *pgd; -@@ -268,63 +173,20 @@ - - kmap_init(); - } -- --void setup_highmem(unsigned long len) --{ -- struct mem_region *region; -- struct page *page, *map; -- unsigned long phys; -- int i, cur, index; -- -- phys = physmem_size; -- do { -- cur = min(len, (unsigned long) REGION_SIZE); -- i = setup_one_range(-1, NULL, -1, phys >> PAGE_SHIFT, cur, -- NULL); -- if(i == -1){ -- printk("setup_highmem - setup_one_range failed\n"); -- return; -- } -- region = regions[i]; -- index = phys / PAGE_SIZE; -- region->mem_map = &mem_map[index]; -- -- map = region->mem_map; -- for(i = 0; i < (cur >> PAGE_SHIFT); i++){ -- page = &map[i]; -- ClearPageReserved(page); -- set_bit(PG_highmem, &page->flags); -- set_page_count(page, 1); -- __free_page(page); -- } -- phys += cur; -- len -= cur; -- } while(len > 0); --} --#endif -+#endif /* CONFIG_HIGHMEM */ - - void paging_init(void) - { -- struct mem_region *region; -- unsigned long zones_size[MAX_NR_ZONES], start, end, vaddr; -- int i, index; -+ unsigned long zones_size[MAX_NR_ZONES], vaddr; -+ int i; - - empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); - empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); - for(i=0;i> PAGE_SHIFT) - -- (uml_physmem >> PAGE_SHIFT); -+ zones_size[0] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT); - zones_size[2] = highmem >> PAGE_SHIFT; - free_area_init(zones_size); -- start = phys_region_index(__pa(uml_physmem)); -- end = phys_region_index(__pa(high_physmem - 1)); -- for(i = start; i <= end; i++){ -- region = regions[i]; -- index = (region->start - uml_physmem) / PAGE_SIZE; -- region->mem_map = &mem_map[index]; -- if(i > start) free_bootmem(__pa(region->start), region->len); -- } - - /* - * Fixed mappings, only the page table structure has to be -@@ -335,15 +197,33 @@ - - #ifdef CONFIG_HIGHMEM - init_highmem(); -- setup_highmem(highmem); - #endif - } - --pte_t __bad_page(void) -+struct page *arch_validate(struct page *page, int mask, int order) - { -- clear_page(empty_bad_page); -- return pte_mkdirty(mk_pte((struct page *) empty_bad_page, -- PAGE_SHARED)); -+ unsigned long addr, zero = 0; -+ int i; -+ -+ again: -+ if(page == NULL) return(page); -+ if(PageHighMem(page)) return(page); -+ -+ addr = (unsigned long) page_address(page); -+ for(i = 0; i < (1 << order); i++){ -+ current->thread.fault_addr = (void *) addr; -+ if(__do_copy_to_user((void *) addr, &zero, -+ sizeof(zero), -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)){ -+ if(!(mask & __GFP_WAIT)) return(NULL); -+ else break; -+ } -+ addr += PAGE_SIZE; -+ } -+ if(i == (1 << order)) return(page); -+ page = alloc_pages(mask, order); -+ goto again; - } - - /* This can't do anything because nothing in the kernel image can be freed -@@ -401,395 +281,6 @@ - printk("%d pages swap cached\n", cached); - } - --static int __init uml_mem_setup(char *line, int *add) --{ -- char *retptr; -- physmem_size = memparse(line,&retptr); -- return 0; --} --__uml_setup("mem=", uml_mem_setup, --"mem=\n" --" This controls how much \"physical\" memory the kernel allocates\n" --" for the system. The size is specified as a number followed by\n" --" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" --" This is not related to the amount of memory in the physical\n" --" machine. It can be more, and the excess, if it's ever used, will\n" --" just be swapped out.\n Example: mem=64M\n\n" --); -- --struct page *arch_validate(struct page *page, int mask, int order) --{ -- unsigned long addr, zero = 0; -- int i; -- -- again: -- if(page == NULL) return(page); -- if(PageHighMem(page)) return(page); -- -- addr = (unsigned long) page_address(page); -- for(i = 0; i < (1 << order); i++){ -- current->thread.fault_addr = (void *) addr; -- if(__do_copy_to_user((void *) addr, &zero, -- sizeof(zero), -- ¤t->thread.fault_addr, -- ¤t->thread.fault_catcher)){ -- if(!(mask & __GFP_WAIT)) return(NULL); -- else break; -- } -- addr += PAGE_SIZE; -- } -- if(i == (1 << order)) return(page); -- page = alloc_pages(mask, order); -- goto again; --} -- --DECLARE_MUTEX(vm_reserved_sem); --static struct list_head vm_reserved = LIST_HEAD_INIT(vm_reserved); -- --/* Static structures, linked in to the list in early boot */ --static struct vm_reserved head = { -- .list = LIST_HEAD_INIT(head.list), -- .start = 0, -- .end = 0xffffffff --}; -- --static struct vm_reserved tail = { -- .list = LIST_HEAD_INIT(tail.list), -- .start = 0, -- .end = 0xffffffff --}; -- --void set_usable_vm(unsigned long start, unsigned long end) --{ -- list_add(&head.list, &vm_reserved); -- list_add(&tail.list, &head.list); -- head.end = start; -- tail.start = end; --} -- --int reserve_vm(unsigned long start, unsigned long end, void *e) -- --{ -- struct vm_reserved *entry = e, *reserved, *prev; -- struct list_head *ele; -- int err; -- -- down(&vm_reserved_sem); -- list_for_each(ele, &vm_reserved){ -- reserved = list_entry(ele, struct vm_reserved, list); -- if(reserved->start >= end) goto found; -- } -- panic("Reserved vm out of range"); -- found: -- prev = list_entry(ele->prev, struct vm_reserved, list); -- if(prev->end > start) -- panic("Can't reserve vm"); -- if(entry == NULL) -- entry = kmalloc(sizeof(*entry), GFP_KERNEL); -- if(entry == NULL){ -- printk("reserve_vm : Failed to allocate entry\n"); -- err = -ENOMEM; -- goto out; -- } -- *entry = ((struct vm_reserved) -- { .list = LIST_HEAD_INIT(entry->list), -- .start = start, -- .end = end }); -- list_add(&entry->list, &prev->list); -- err = 0; -- out: -- up(&vm_reserved_sem); -- return(0); --} -- --unsigned long get_vm(unsigned long len) --{ -- struct vm_reserved *this, *next; -- struct list_head *ele; -- unsigned long start; -- int err; -- -- down(&vm_reserved_sem); -- list_for_each(ele, &vm_reserved){ -- this = list_entry(ele, struct vm_reserved, list); -- next = list_entry(ele->next, struct vm_reserved, list); -- if((this->start < next->start) && -- (this->end + len + PAGE_SIZE <= next->start)) -- goto found; -- } -- up(&vm_reserved_sem); -- return(0); -- found: -- up(&vm_reserved_sem); -- start = (unsigned long) UML_ROUND_UP(this->end) + PAGE_SIZE; -- err = reserve_vm(start, start + len, NULL); -- if(err) return(0); -- return(start); --} -- --int nregions(void) --{ -- return(NREGIONS); --} -- --void setup_range(int fd, char *driver, unsigned long start, unsigned long pfn, -- unsigned long len, int need_vm, struct mem_region *region, -- void *reserved) --{ -- int i, cur; -- -- do { -- cur = min(len, (unsigned long) REGION_SIZE); -- i = setup_one_range(fd, driver, start, pfn, cur, region); -- region = regions[i]; -- if(need_vm && setup_region(region, reserved)){ -- kfree(region); -- regions[i] = NULL; -- return; -- } -- start += cur; -- if(pfn != -1) pfn += cur; -- len -= cur; -- } while(len > 0); --} -- --struct iomem { -- char *name; -- int fd; -- unsigned long size; --}; -- --/* iomem regions can only be added on the command line at the moment. -- * Locking will be needed when they can be added via mconsole. -- */ -- --struct iomem iomem_regions[NREGIONS] = { [ 0 ... NREGIONS - 1 ] = -- { .name = NULL, -- .fd = -1, -- .size = 0 } }; -- --int num_iomem_regions = 0; -- --void add_iomem(char *name, int fd, unsigned long size) --{ -- if(num_iomem_regions == sizeof(iomem_regions)/sizeof(iomem_regions[0])) -- return; -- size = (size + PAGE_SIZE - 1) & PAGE_MASK; -- iomem_regions[num_iomem_regions++] = -- ((struct iomem) { .name = name, -- .fd = fd, -- .size = size } ); --} -- --int setup_iomem(void) --{ -- struct iomem *iomem; -- int i; -- -- for(i = 0; i < num_iomem_regions; i++){ -- iomem = &iomem_regions[i]; -- setup_range(iomem->fd, iomem->name, -1, -1, iomem->size, 1, -- NULL, NULL); -- } -- return(0); --} -- --__initcall(setup_iomem); -- --#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) --#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -- --/* Changed during early boot */ --static struct mem_region physmem_region; --static struct vm_reserved physmem_reserved; -- --void setup_physmem(unsigned long start, unsigned long reserve_end, -- unsigned long len) --{ -- struct mem_region *region = &physmem_region; -- struct vm_reserved *reserved = &physmem_reserved; -- unsigned long cur, pfn = 0; -- int do_free = 1, bootmap_size; -- -- do { -- cur = min(len, (unsigned long) REGION_SIZE); -- if(region == NULL) -- region = alloc_bootmem_low_pages(sizeof(*region)); -- if(reserved == NULL) -- reserved = alloc_bootmem_low_pages(sizeof(*reserved)); -- if((region == NULL) || (reserved == NULL)) -- panic("Couldn't allocate physmem region or vm " -- "reservation\n"); -- setup_range(-1, NULL, start, pfn, cur, 1, region, reserved); -- -- if(do_free){ -- unsigned long reserve = reserve_end - start; -- int pfn = PFN_UP(__pa(reserve_end)); -- int delta = (len - reserve) >> PAGE_SHIFT; -- -- bootmap_size = init_bootmem(pfn, pfn + delta); -- free_bootmem(__pa(reserve_end) + bootmap_size, -- cur - bootmap_size - reserve); -- do_free = 0; -- } -- start += cur; -- pfn += cur >> PAGE_SHIFT; -- len -= cur; -- region = NULL; -- reserved = NULL; -- } while(len > 0); --} -- --struct mem_region *phys_region(unsigned long phys) --{ -- unsigned int n = phys_region_index(phys); -- -- if(regions[n] == NULL) -- panic("Physical address in uninitialized region"); -- return(regions[n]); --} -- --unsigned long phys_offset(unsigned long phys) --{ -- return(phys_addr(phys)); --} -- --struct page *phys_mem_map(unsigned long phys) --{ -- return((struct page *) phys_region(phys)->mem_map); --} -- --struct page *pte_mem_map(pte_t pte) --{ -- return(phys_mem_map(pte_val(pte))); --} -- --struct mem_region *page_region(struct page *page, int *index_out) --{ -- int i; -- struct mem_region *region; -- struct page *map; -- -- for(i = 0; i < NREGIONS; i++){ -- region = regions[i]; -- if(region == NULL) continue; -- map = region->mem_map; -- if((page >= map) && (page < &map[region->len >> PAGE_SHIFT])){ -- if(index_out != NULL) *index_out = i; -- return(region); -- } -- } -- panic("No region found for page"); -- return(NULL); --} -- --unsigned long page_to_pfn(struct page *page) --{ -- struct mem_region *region = page_region(page, NULL); -- -- return(region->start_pfn + (page - (struct page *) region->mem_map)); --} -- --struct mem_region *pfn_to_region(unsigned long pfn, int *index_out) --{ -- struct mem_region *region; -- int i; -- -- for(i = 0; i < NREGIONS; i++){ -- region = regions[i]; -- if(region == NULL) -- continue; -- -- if((region->start_pfn <= pfn) && -- (region->start_pfn + (region->len >> PAGE_SHIFT) > pfn)){ -- if(index_out != NULL) -- *index_out = i; -- return(region); -- } -- } -- return(NULL); --} -- --struct page *pfn_to_page(unsigned long pfn) --{ -- struct mem_region *region = pfn_to_region(pfn, NULL); -- struct page *mem_map = (struct page *) region->mem_map; -- -- return(&mem_map[pfn - region->start_pfn]); --} -- --unsigned long phys_to_pfn(unsigned long p) --{ -- struct mem_region *region = regions[phys_region_index(p)]; -- -- return(region->start_pfn + (phys_addr(p) >> PAGE_SHIFT)); --} -- --unsigned long pfn_to_phys(unsigned long pfn) --{ -- int n; -- struct mem_region *region = pfn_to_region(pfn, &n); -- -- return(mk_phys((pfn - region->start_pfn) << PAGE_SHIFT, n)); --} -- --struct page *page_mem_map(struct page *page) --{ -- return((struct page *) page_region(page, NULL)->mem_map); --} -- --extern unsigned long region_pa(void *virt) --{ -- struct mem_region *region; -- unsigned long addr = (unsigned long) virt; -- int i; -- -- for(i = 0; i < NREGIONS; i++){ -- region = regions[i]; -- if(region == NULL) continue; -- if((region->start <= addr) && -- (addr <= region->start + region->len)) -- return(mk_phys(addr - region->start, i)); -- } -- panic("region_pa : no region for virtual address"); -- return(0); --} -- --extern void *region_va(unsigned long phys) --{ -- return((void *) (phys_region(phys)->start + phys_addr(phys))); --} -- --unsigned long page_to_phys(struct page *page) --{ -- int n; -- struct mem_region *region = page_region(page, &n); -- struct page *map = region->mem_map; -- return(mk_phys((page - map) << PAGE_SHIFT, n)); --} -- --struct page *phys_to_page(unsigned long phys) --{ -- struct page *mem_map; -- -- mem_map = phys_mem_map(phys); -- return(mem_map + (phys_offset(phys) >> PAGE_SHIFT)); --} -- --static int setup_mem_maps(void) --{ -- struct mem_region *region; -- int i; -- -- for(i = 0; i < NREGIONS; i++){ -- region = regions[i]; -- if((region != NULL) && (region->fd > 0)) init_maps(region); -- } -- return(0); --} -- --__initcall(setup_mem_maps); -- - /* - * Allocate and free page tables. - */ -Index: uml-2.6.7/mm/page_alloc.c -=================================================================== ---- uml-2.6.7.orig/mm/page_alloc.c 2004-07-16 19:35:58.508373224 +0300 -+++ uml-2.6.7/mm/page_alloc.c 2004-07-16 19:47:23.806192120 +0300 -@@ -279,6 +279,8 @@ - LIST_HEAD(list); - int i; - -+ arch_free_page(page, order); -+ - mod_page_state(pgfree, 1 << order); - for (i = 0 ; i < (1 << order) ; ++i) - free_pages_check(__FUNCTION__, page + i); -@@ -497,6 +499,8 @@ - struct per_cpu_pages *pcp; - unsigned long flags; - -+ arch_free_page(page, 0); -+ - kernel_map_pages(page, 1, 0); - inc_page_state(pgfree); - free_pages_check(__FUNCTION__, page); -Index: uml-2.6.7/arch/um/kernel/skas/uaccess.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/uaccess.c 2004-07-16 19:47:23.621220240 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/uaccess.c 2004-07-16 19:47:23.735202912 +0300 -@@ -0,0 +1,219 @@ -+/* -+ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/kernel.h" -+#include "linux/string.h" -+#include "linux/fs.h" -+#include "linux/highmem.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/uaccess.h" -+#include "kern_util.h" -+ -+extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr, -+ pte_t *pte_out); -+ -+static unsigned long maybe_map(unsigned long virt, int is_write) -+{ -+ pte_t pte; -+ int err; -+ -+ void *phys = um_virt_to_phys(current, virt, &pte); -+ int dummy_code; -+ -+ if(IS_ERR(phys) || (is_write && !pte_write(pte))){ -+ err = handle_page_fault(virt, 0, is_write, 0, &dummy_code); -+ if(err) -+ return(0); -+ phys = um_virt_to_phys(current, virt, NULL); -+ } -+ return((unsigned long) phys); -+} -+ -+static int do_op(unsigned long addr, int len, int is_write, -+ int (*op)(unsigned long addr, int len, void *arg), void *arg) -+{ -+ struct page *page; -+ int n; -+ -+ addr = maybe_map(addr, is_write); -+ if(addr == -1) -+ return(-1); -+ -+ page = phys_to_page(addr); -+ addr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); -+ n = (*op)(addr, len, arg); -+ kunmap(page); -+ -+ return(n); -+} -+ -+static int buffer_op(unsigned long addr, int len, int is_write, -+ int (*op)(unsigned long addr, int len, void *arg), -+ void *arg) -+{ -+ int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); -+ int remain = len, n; -+ -+ n = do_op(addr, size, is_write, op, arg); -+ if(n != 0) -+ return(n < 0 ? remain : 0); -+ -+ addr += size; -+ remain -= size; -+ if(remain == 0) -+ return(0); -+ -+ while(addr < ((addr + remain) & PAGE_MASK)){ -+ n = do_op(addr, PAGE_SIZE, is_write, op, arg); -+ if(n != 0) -+ return(n < 0 ? remain : 0); -+ -+ addr += PAGE_SIZE; -+ remain -= PAGE_SIZE; -+ } -+ if(remain == 0) -+ return(0); -+ -+ n = do_op(addr, remain, is_write, op, arg); -+ if(n != 0) -+ return(n < 0 ? remain : 0); -+ return(0); -+} -+ -+static int copy_chunk_from_user(unsigned long from, int len, void *arg) -+{ -+ unsigned long *to_ptr = arg, to = *to_ptr; -+ -+ memcpy((void *) to, (void *) from, len); -+ *to_ptr += len; -+ return(0); -+} -+ -+int copy_from_user_skas(void *to, const void *from, int n) -+{ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ memcpy(to, from, n); -+ return(0); -+ } -+ -+ return(access_ok_skas(VERIFY_READ, from, n) ? -+ buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to): -+ n); -+} -+ -+static int copy_chunk_to_user(unsigned long to, int len, void *arg) -+{ -+ unsigned long *from_ptr = arg, from = *from_ptr; -+ -+ memcpy((void *) to, (void *) from, len); -+ *from_ptr += len; -+ return(0); -+} -+ -+int copy_to_user_skas(void *to, const void *from, int n) -+{ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ memcpy(to, from, n); -+ return(0); -+ } -+ -+ return(access_ok_skas(VERIFY_WRITE, to, n) ? -+ buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) : -+ n); -+} -+ -+static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) -+{ -+ char **to_ptr = arg, *to = *to_ptr; -+ int n; -+ -+ strncpy(to, (void *) from, len); -+ n = strnlen(to, len); -+ *to_ptr += n; -+ -+ if(n < len) -+ return(1); -+ return(0); -+} -+ -+int strncpy_from_user_skas(char *dst, const char *src, int count) -+{ -+ int n; -+ char *ptr = dst; -+ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ strncpy(dst, src, count); -+ return(strnlen(dst, count)); -+ } -+ -+ if(!access_ok_skas(VERIFY_READ, src, 1)) -+ return(-EFAULT); -+ -+ n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, -+ &ptr); -+ if(n != 0) -+ return(-EFAULT); -+ return(strnlen(dst, count)); -+} -+ -+static int clear_chunk(unsigned long addr, int len, void *unused) -+{ -+ memset((void *) addr, 0, len); -+ return(0); -+} -+ -+int __clear_user_skas(void *mem, int len) -+{ -+ return(buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL)); -+} -+ -+int clear_user_skas(void *mem, int len) -+{ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ memset(mem, 0, len); -+ return(0); -+ } -+ -+ return(access_ok_skas(VERIFY_WRITE, mem, len) ? -+ buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len); -+} -+ -+static int strnlen_chunk(unsigned long str, int len, void *arg) -+{ -+ int *len_ptr = arg, n; -+ -+ n = strnlen((void *) str, len); -+ *len_ptr += n; -+ -+ if(n < len) -+ return(1); -+ return(0); -+} -+ -+int strnlen_user_skas(const void *str, int len) -+{ -+ int count = 0, n; -+ -+ if(segment_eq(get_fs(), KERNEL_DS)) -+ return(strnlen(str, len) + 1); -+ -+ n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count); -+ if(n == 0) -+ return(count + 1); -+ return(-EFAULT); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/Makefile 2004-07-16 19:36:57.056472560 +0300 -+++ uml-2.6.7/arch/um/Makefile 2004-07-16 19:47:24.132142568 +0300 -@@ -22,17 +22,21 @@ - $(ARCH_DIR)/sys-$(SUBARCH)/ - - # Have to precede the include because the included Makefiles reference them. --SYMLINK_HEADERS = include/asm-um/archparam.h include/asm-um/system.h \ -- include/asm-um/sigcontext.h include/asm-um/processor.h \ -- include/asm-um/ptrace.h include/asm-um/arch-signal.h -+SYMLINK_HEADERS = archparam.h system.h sigcontext.h processor.h ptrace.h \ -+ arch-signal.h module.h -+SYMLINK_HEADERS := $(foreach header,$(SYMLINK_HEADERS),include/asm-um/$(header)) - - ARCH_SYMLINKS = include/asm-um/arch $(ARCH_DIR)/include/sysdep $(ARCH_DIR)/os \ - $(SYMLINK_HEADERS) $(ARCH_DIR)/include/uml-config.h - - GEN_HEADERS += $(ARCH_DIR)/include/task.h $(ARCH_DIR)/include/kern_constants.h - --include $(ARCH_DIR)/Makefile-$(SUBARCH) --include $(ARCH_DIR)/Makefile-os-$(OS) -+# This target adds dependencies to "prepare". They are defined in the included -+# Makefiles (see Makefile-i386). -+ -+.PHONY: sys_prepare -+sys_prepare: -+ @: - - MAKEFILE-$(CONFIG_MODE_TT) += Makefile-tt - MAKEFILE-$(CONFIG_MODE_SKAS) += Makefile-skas -@@ -41,6 +45,9 @@ - include $(addprefix $(ARCH_DIR)/,$(MAKEFILE-y)) - endif - -+include $(ARCH_DIR)/Makefile-$(SUBARCH) -+include $(ARCH_DIR)/Makefile-os-$(OS) -+ - EXTRAVERSION := $(EXTRAVERSION)-1um - - ARCH_INCLUDE = -I$(ARCH_DIR)/include -@@ -52,14 +59,22 @@ - - CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \ - -D_LARGEFILE64_SOURCE $(ARCH_INCLUDE) -Derrno=kernel_errno \ -- $(MODE_INCLUDE) -+ -Dsigprocmask=kernel_sigprocmask $(MODE_INCLUDE) -+ -+CFLAGS += $(call check_gcc,-fno-unit-at-a-time,) - - LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc - -+# These are needed for clean and mrproper, since in that case .config is not -+# included; the values here are meaningless -+ -+CONFIG_NEST_LEVEL ?= 0 -+CONFIG_KERNEL_HALF_GIGS ?= 0 -+ - SIZE = (($(CONFIG_NEST_LEVEL) + $(CONFIG_KERNEL_HALF_GIGS)) * 0x20000000) - - ifeq ($(CONFIG_MODE_SKAS), y) --$(SYS_HEADERS) : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h -+$(SYS_HEADERS) : $(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h - endif - - include/linux/version.h: arch/$(ARCH)/Makefile -@@ -98,17 +113,17 @@ - CONFIG_KERNEL_STACK_ORDER ?= 2 - STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] ) - --AFLAGS_vmlinux.lds.o = -U$(SUBARCH) \ -+AFLAGS_vmlinux.lds.o = $(shell echo -U$(SUBARCH) \ - -DSTART=$$(($(TOP_ADDR) - $(SIZE))) -DELF_ARCH=$(ELF_ARCH) \ - -DELF_FORMAT=\"$(ELF_FORMAT)\" $(CPP_MODE_TT) \ -- -DKERNEL_STACK_SIZE=$(STACK_SIZE) -+ -DKERNEL_STACK_SIZE=$(STACK_SIZE)) - --AFLAGS_$(LD_SCRIPT-y:.s=).o = $(AFLAGS_vmlinux.lds.o) -P -C -Uum -+export AFLAGS_$(LD_SCRIPT-y:.s=).o = $(AFLAGS_vmlinux.lds.o) -P -C -Uum - - LD_SCRIPT-y := $(ARCH_DIR)/$(LD_SCRIPT-y) - --$(LD_SCRIPT-y) : $(LD_SCRIPT-y:.s=.S) scripts FORCE -- $(call if_changed_dep,as_s_S) -+#$(LD_SCRIPT-y) : $(LD_SCRIPT-y:.s=.S) scripts FORCE -+# $(call if_changed_dep,as_s_S) - - linux: vmlinux $(LD_SCRIPT-y) - $(CC) -Wl,-T,$(LD_SCRIPT-y) $(LINK-y) $(LINK_WRAPS) \ -@@ -116,37 +131,47 @@ - - USER_CFLAGS := $(patsubst -I%,,$(CFLAGS)) - USER_CFLAGS := $(patsubst -Derrno=kernel_errno,,$(USER_CFLAGS)) -+USER_CFLAGS := $(patsubst -Dsigprocmask=kernel_sigprocmask,,$(USER_CFLAGS)) - USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \ - $(MODE_INCLUDE) - - # To get a definition of F_SETSIG - USER_CFLAGS += -D_GNU_SOURCE - -+# From main Makefile, these options are set after including the ARCH makefile. -+# So copy them here. -+ -+ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE -+USER_CFLAGS += -Os -+else -+USER_CFLAGS += -O2 -+endif -+ -+ifndef CONFIG_FRAME_POINTER -+USER_CFLAGS += -fomit-frame-pointer -+endif -+ -+ifdef CONFIG_DEBUG_INFO -+USER_CFLAGS += -g -+endif -+ - CLEAN_FILES += linux x.i gmon.out $(ARCH_DIR)/uml.lds.s \ -- $(ARCH_DIR)/dyn_link.ld.s $(GEN_HEADERS) -+ $(ARCH_DIR)/dyn_link.ld.s $(ARCH_DIR)/include/uml-config.h \ -+ $(GEN_HEADERS) -+ -+MRPROPER_FILES += $(SYMLINK_HEADERS) $(ARCH_SYMLINKS) \ -+ $(addprefix $(ARCH_DIR)/kernel/,$(KERN_SYMLINKS)) - --$(ARCH_DIR)/main.o: $(ARCH_DIR)/main.c -- $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< -+$(ARCH_DIR)/main.o: $(ARCH_DIR)/main.c sys_prepare -+ @ echo ' MAIN $@' -+ @ $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< - - archmrproper: -- for d in $(ARCH_SUBDIRS) $(ARCH_DIR)/util; \ -- do \ -- $(MAKE) -C $$d archmrproper; \ -- done -- rm -f $(CLEAN_FILES) $(SYMLINK_HEADERS) $(ARCH_SYMLINKS) include/asm \ -- $(addprefix $(ARCH_DIR)/kernel/,$(KERN_SYMLINKS)) -- --archclean: sysclean -- for d in $(ARCH_SUBDIRS) $(ARCH_DIR)/util; \ -- do \ -- $(MAKE) -C $$d clean; \ -- done -- find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ -- -o -name '*.gcov' \) -type f -print | xargs rm -f -- rm -f linux x.i gmon.out $(ARCH_DIR)/link.ld $(GEN_HEADERS) -+ @: - --archdep: -- for d in $(ARCH_SUBDIRS); do $(MAKE) -C $$d fastdep; done -+archclean: -+ @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ -+ -o -name '*.gcov' \) -type f -print | xargs rm -f - - $(SYMLINK_HEADERS): - cd $(TOPDIR)/$(dir $@) ; \ -@@ -161,19 +186,32 @@ - $(ARCH_DIR)/os: - cd $(ARCH_DIR) && ln -sf os-$(OS) os - --$(ARCH_DIR)/include/uml-config.h : -- sed 's/ CONFIG/ UML_CONFIG/' $(TOPDIR)/include/linux/autoconf.h > $@ -+# Generated files -+define filechk_umlconfig -+ sed 's/ CONFIG/ UML_CONFIG/' -+endef -+ -+$(ARCH_DIR)/include/uml-config.h : $(TOPDIR)/include/linux/autoconf.h -+ $(call filechk,umlconfig) -+ -+filechk_gen_header = $< - - $(ARCH_DIR)/include/task.h : $(ARCH_DIR)/util/mk_task -- $< > $@ -+ $(call filechk,gen_header) - - $(ARCH_DIR)/include/kern_constants.h : $(ARCH_DIR)/util/mk_constants -- $< > $@ -+ $(call filechk,gen_header) - --$(ARCH_DIR)/util/mk_task : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h \ -- $(ARCH_DIR)/util FORCE ; -+$(ARCH_DIR)/util/mk_task $(ARCH_DIR)/util/mk_constants : $(ARCH_DIR)/util \ -+ sys_prepare FORCE ; - - $(ARCH_DIR)/util: FORCE -- @$(call descend,$@,) -+ $(Q)$(MAKE) $(build)=$@ -+ -+export SUBARCH USER_CFLAGS OS -+ -+all: linux - --export SUBARCH USER_CFLAGS OS -+define archhelp -+ echo '* linux - Binary kernel image (./linux)' -+endef -Index: uml-2.6.7/arch/um/kernel/tt/unmap.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/unmap.c 2004-07-16 19:37:13.584959848 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/unmap.c 2004-07-16 19:47:23.753200176 +0300 -@@ -3,10 +3,7 @@ - * Licensed under the GPL - */ - --#include --#include - #include --#include "user.h" - - int switcheroo(int fd, int prot, void *from, void *to, int size) - { -Index: uml-2.6.7/arch/um/kernel/tt/ptproxy/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/ptproxy/Makefile 2004-07-16 19:36:30.113568504 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/ptproxy/Makefile 2004-07-16 19:47:23.746201240 +0300 -@@ -9,5 +9,3 @@ - - $(USER_OBJS) : %.o: %.c - $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< -- --clean: -Index: uml-2.6.7/arch/um/kernel/mem_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/mem_user.c 2004-07-16 19:36:47.959855456 +0300 -+++ uml-2.6.7/arch/um/kernel/mem_user.c 2004-07-16 19:47:23.721205040 +0300 -@@ -34,10 +34,9 @@ - #include - #include - #include --#include - #include - #include --#include -+#include - #include - #include - #include "kern_util.h" -@@ -47,105 +46,145 @@ - #include "init.h" - #include "os.h" - #include "tempfile.h" -+#include "kern_constants.h" - - extern struct mem_region physmem_region; - - #define TEMPNAME_TEMPLATE "vm_file-XXXXXX" - --int create_mem_file(unsigned long len) -+static int create_tmp_file(unsigned long len) - { -- int fd; -+ int fd, err; - char zero; - - fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1); -- if (fchmod(fd, 0777) < 0){ -- perror("fchmod"); -+ if(fd < 0) { -+ os_print_error(fd, "make_tempfile"); -+ exit(1); -+ } -+ -+ err = os_mode_fd(fd, 0777); -+ if(err < 0){ -+ os_print_error(err, "os_mode_fd"); - exit(1); - } -- if(os_seek_file(fd, len) < 0){ -- perror("lseek"); -+ err = os_seek_file(fd, len); -+ if(err < 0){ -+ os_print_error(err, "os_seek_file"); - exit(1); - } - zero = 0; -- if(write(fd, &zero, 1) != 1){ -- perror("write"); -+ err = os_write_file(fd, &zero, 1); -+ if(err != 1){ -+ os_print_error(err, "os_write_file"); - exit(1); - } -- if(fcntl(fd, F_SETFD, 1) != 0) -- perror("Setting FD_CLOEXEC failed"); -+ - return(fd); - } - --int setup_region(struct mem_region *region, void *entry) -+static int have_devanon = 0; -+ -+void check_devanon(void) -+{ -+ int fd; -+ -+ printk("Checking for /dev/anon on the host..."); -+ fd = open("/dev/anon", O_RDWR); -+ if(fd < 0){ -+ printk("Not available (open failed with errno %d)\n", errno); -+ return; -+ } -+ -+ printk("OK\n"); -+ have_devanon = 1; -+} -+ -+static int create_anon_file(unsigned long len) - { -- void *loc, *start; -- char *driver; -- int err, offset; -- -- if(region->start != -1){ -- err = reserve_vm(region->start, -- region->start + region->len, entry); -- if(err){ -- printk("setup_region : failed to reserve " -- "0x%x - 0x%x for driver '%s'\n", -- region->start, -- region->start + region->len, -- region->driver); -- return(-1); -- } -- } -- else region->start = get_vm(region->len); -- if(region->start == 0){ -- if(region->driver == NULL) driver = "physmem"; -- else driver = region->driver; -- printk("setup_region : failed to find vm for " -- "driver '%s' (length %d)\n", driver, region->len); -- return(-1); -- } -- if(region->start == uml_physmem){ -- start = (void *) uml_reserved; -- offset = uml_reserved - uml_physmem; -- } -- else { -- start = (void *) region->start; -- offset = 0; -- } -- -- loc = mmap(start, region->len - offset, PROT_READ | PROT_WRITE, -- MAP_SHARED | MAP_FIXED, region->fd, offset); -- if(loc != start){ -- perror("Mapping memory"); -+ void *addr; -+ int fd; -+ -+ fd = open("/dev/anon", O_RDWR); -+ if(fd < 0) { -+ os_print_error(fd, "opening /dev/anon"); - exit(1); - } -- return(0); -+ -+ addr = mmap(NULL, len, PROT_READ | PROT_WRITE , MAP_PRIVATE, fd, 0); -+ if(addr == MAP_FAILED){ -+ os_print_error((int) addr, "mapping physmem file"); -+ exit(1); -+ } -+ munmap(addr, len); -+ -+ return(fd); -+} -+ -+int create_mem_file(unsigned long len) -+{ -+ int err, fd; -+ -+ if(have_devanon) -+ fd = create_anon_file(len); -+ else fd = create_tmp_file(len); -+ -+ err = os_set_exec_close(fd, 1); -+ if(err < 0) -+ os_print_error(err, "exec_close"); -+ return(fd); - } - -+struct iomem_region *iomem_regions = NULL; -+int iomem_size = 0; -+ - static int __init parse_iomem(char *str, int *add) - { -- struct stat buf; -+ struct iomem_region *new; -+ struct uml_stat buf; - char *file, *driver; -- int fd; -+ int fd, err; - - driver = str; - file = strchr(str,','); - if(file == NULL){ -- printk("parse_iomem : failed to parse iomem\n"); -- return(1); -+ printf("parse_iomem : failed to parse iomem\n"); -+ goto out; - } - *file = '\0'; - file++; - fd = os_open_file(file, of_rdwr(OPENFLAGS()), 0); - if(fd < 0){ -- printk("parse_iomem - Couldn't open io file, errno = %d\n", -- errno); -- return(1); -- } -- if(fstat(fd, &buf) < 0) { -- printk("parse_iomem - cannot fstat file, errno = %d\n", errno); -- return(1); -+ os_print_error(fd, "parse_iomem - Couldn't open io file"); -+ goto out; - } -- add_iomem(driver, fd, buf.st_size); -+ -+ err = os_stat_fd(fd, &buf); -+ if(err < 0){ -+ os_print_error(err, "parse_iomem - cannot stat_fd file"); -+ goto out_close; -+ } -+ -+ new = malloc(sizeof(*new)); -+ if(new == NULL){ -+ perror("Couldn't allocate iomem_region struct"); -+ goto out_close; -+ } -+ -+ *new = ((struct iomem_region) { .next = iomem_regions, -+ .driver = driver, -+ .fd = fd, -+ .size = buf.ust_size, -+ .phys = 0, -+ .virt = 0 }); -+ iomem_regions = new; -+ iomem_size += new->size + UM_KERN_PAGE_SIZE; -+ - return(0); -+ out_close: -+ os_close_file(fd); -+ out: -+ return(1); - } - - __uml_setup("iomem=", parse_iomem, -@@ -153,73 +192,20 @@ - " Configure as an IO memory region named .\n\n" - ); - --#ifdef notdef --int logging = 0; --int logging_fd = -1; -- --int logging_line = 0; --char logging_buf[256]; -- --void log(char *fmt, ...) --{ -- va_list ap; -- struct timeval tv; -- struct openflags flags; -- -- if(logging == 0) return; -- if(logging_fd < 0){ -- flags = of_create(of_trunc(of_rdrw(OPENFLAGS()))); -- logging_fd = os_open_file("log", flags, 0644); -- } -- gettimeofday(&tv, NULL); -- sprintf(logging_buf, "%d\t %u.%u ", logging_line++, tv.tv_sec, -- tv.tv_usec); -- va_start(ap, fmt); -- vsprintf(&logging_buf[strlen(logging_buf)], fmt, ap); -- va_end(ap); -- write(logging_fd, logging_buf, strlen(logging_buf)); --} --#endif -- --int map_memory(unsigned long virt, unsigned long phys, unsigned long len, -- int r, int w, int x) --{ -- struct mem_region *region = phys_region(phys); -- -- return(os_map_memory((void *) virt, region->fd, phys_offset(phys), len, -- r, w, x)); --} -- - int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x, - int must_succeed) - { -- if(os_protect_memory((void *) addr, len, r, w, x) < 0){ -+ int err; -+ -+ err = os_protect_memory((void *) addr, len, r, w, x); -+ if(err < 0){ - if(must_succeed) -- panic("protect failed, errno = %d", errno); -- else return(-errno); -+ panic("protect failed, err = %d", -err); -+ else return(err); - } - return(0); - } - --unsigned long find_iomem(char *driver, unsigned long *len_out) --{ -- struct mem_region *region; -- int i, n; -- -- n = nregions(); -- for(i = 0; i < n; i++){ -- region = regions[i]; -- if(region == NULL) continue; -- if((region->driver != NULL) && -- !strcmp(region->driver, driver)){ -- *len_out = region->len; -- return(region->start); -- } -- } -- *len_out = 0; -- return 0; --} -- - /* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically -Index: uml-2.6.7/arch/um/kernel/init_task.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/init_task.c 2004-07-16 19:37:49.384517488 +0300 -+++ uml-2.6.7/arch/um/kernel/init_task.c 2004-07-16 19:47:23.716205800 +0300 -@@ -8,7 +8,6 @@ - #include "linux/module.h" - #include "linux/sched.h" - #include "linux/init_task.h" --#include "linux/version.h" - #include "asm/uaccess.h" - #include "asm/pgtable.h" - #include "user_util.h" -@@ -18,7 +17,7 @@ - struct mm_struct init_mm = INIT_MM(init_mm); - static struct files_struct init_files = INIT_FILES; - static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -- -+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); - EXPORT_SYMBOL(init_mm); - - /* -@@ -43,26 +42,12 @@ - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; - --struct task_struct *alloc_task_struct(void) --{ -- return((struct task_struct *) -- __get_free_pages(GFP_KERNEL, CONFIG_KERNEL_STACK_ORDER)); --} -- - void unprotect_stack(unsigned long stack) - { - protect_memory(stack, (1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE, - 1, 1, 0, 1); - } - --void free_task_struct(struct task_struct *task) --{ -- /* free_pages decrements the page counter and only actually frees -- * the pages if they are now not accessed by anything. -- */ -- free_pages((unsigned long) task, CONFIG_KERNEL_STACK_ORDER); --} -- - /* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically -Index: uml-2.6.7/arch/um/kernel/tty_log.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tty_log.c 2004-07-16 19:36:56.920493232 +0300 -+++ uml-2.6.7/arch/um/kernel/tty_log.c 2004-07-16 19:47:23.753200176 +0300 -@@ -9,10 +9,10 @@ - #include - #include - #include --#include - #include - #include "init.h" - #include "user.h" -+#include "kern_util.h" - #include "os.h" - - #define TTY_LOG_DIR "./" -@@ -24,29 +24,40 @@ - #define TTY_LOG_OPEN 1 - #define TTY_LOG_CLOSE 2 - #define TTY_LOG_WRITE 3 -+#define TTY_LOG_EXEC 4 -+ -+#define TTY_READ 1 -+#define TTY_WRITE 2 - - struct tty_log_buf { - int what; - unsigned long tty; - int len; -+ int direction; -+ unsigned long sec; -+ unsigned long usec; - }; - --int open_tty_log(void *tty) -+int open_tty_log(void *tty, void *current_tty) - { - struct timeval tv; - struct tty_log_buf data; - char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")]; - int fd; - -+ gettimeofday(&tv, NULL); - if(tty_log_fd != -1){ -- data = ((struct tty_log_buf) { what : TTY_LOG_OPEN, -- tty : (unsigned long) tty, -- len : 0 }); -- write(tty_log_fd, &data, sizeof(data)); -+ data = ((struct tty_log_buf) { .what = TTY_LOG_OPEN, -+ .tty = (unsigned long) tty, -+ .len = sizeof(current_tty), -+ .direction = 0, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ os_write_file(tty_log_fd, &data, sizeof(data)); -+ os_write_file(tty_log_fd, ¤t_tty, data.len); - return(tty_log_fd); - } - -- gettimeofday(&tv, NULL); - sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec, - (unsigned int) tv.tv_usec); - -@@ -62,30 +73,117 @@ - void close_tty_log(int fd, void *tty) - { - struct tty_log_buf data; -+ struct timeval tv; - - if(tty_log_fd != -1){ -- data = ((struct tty_log_buf) { what : TTY_LOG_CLOSE, -- tty : (unsigned long) tty, -- len : 0 }); -- write(tty_log_fd, &data, sizeof(data)); -+ gettimeofday(&tv, NULL); -+ data = ((struct tty_log_buf) { .what = TTY_LOG_CLOSE, -+ .tty = (unsigned long) tty, -+ .len = 0, -+ .direction = 0, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ os_write_file(tty_log_fd, &data, sizeof(data)); - return; - } -- close(fd); -+ os_close_file(fd); - } - --int write_tty_log(int fd, char *buf, int len, void *tty) -+static int log_chunk(int fd, const char *buf, int len) - { -+ int total = 0, try, missed, n; -+ char chunk[64]; -+ -+ while(len > 0){ -+ try = (len > sizeof(chunk)) ? sizeof(chunk) : len; -+ missed = copy_from_user_proc(chunk, (char *) buf, try); -+ try -= missed; -+ n = os_write_file(fd, chunk, try); -+ if(n != try) { -+ if(n < 0) -+ return(n); -+ return(-EIO); -+ } -+ if(missed != 0) -+ return(-EFAULT); -+ -+ len -= try; -+ total += try; -+ buf += try; -+ } -+ -+ return(total); -+} -+ -+int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read) -+{ -+ struct timeval tv; - struct tty_log_buf data; -+ int direction; - - if(fd == tty_log_fd){ -- data = ((struct tty_log_buf) { what : TTY_LOG_WRITE, -- tty : (unsigned long) tty, -- len : len }); -- write(tty_log_fd, &data, sizeof(data)); -+ gettimeofday(&tv, NULL); -+ direction = is_read ? TTY_READ : TTY_WRITE; -+ data = ((struct tty_log_buf) { .what = TTY_LOG_WRITE, -+ .tty = (unsigned long) tty, -+ .len = len, -+ .direction = direction, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ os_write_file(tty_log_fd, &data, sizeof(data)); - } -- return(write(fd, buf, len)); -+ -+ return(log_chunk(fd, buf, len)); - } - -+void log_exec(char **argv, void *tty) -+{ -+ struct timeval tv; -+ struct tty_log_buf data; -+ char **ptr,*arg; -+ int len; -+ -+ if(tty_log_fd == -1) return; -+ -+ gettimeofday(&tv, NULL); -+ -+ len = 0; -+ for(ptr = argv; ; ptr++){ -+ if(copy_from_user_proc(&arg, ptr, sizeof(arg))) -+ return; -+ if(arg == NULL) break; -+ len += strlen_user_proc(arg); -+ } -+ -+ data = ((struct tty_log_buf) { .what = TTY_LOG_EXEC, -+ .tty = (unsigned long) tty, -+ .len = len, -+ .direction = 0, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ os_write_file(tty_log_fd, &data, sizeof(data)); -+ -+ for(ptr = argv; ; ptr++){ -+ if(copy_from_user_proc(&arg, ptr, sizeof(arg))) -+ return; -+ if(arg == NULL) break; -+ log_chunk(tty_log_fd, arg, strlen_user_proc(arg)); -+ } -+} -+ -+extern void register_tty_logger(int (*opener)(void *, void *), -+ int (*writer)(int, const char *, int, -+ void *, int), -+ void (*closer)(int, void *)); -+ -+static int register_logger(void) -+{ -+ register_tty_logger(open_tty_log, write_tty_log, close_tty_log); -+ return(0); -+} -+ -+__uml_initcall(register_logger); -+ - static int __init set_tty_log_dir(char *name, int *add) - { - tty_log_dir = name; -@@ -104,7 +202,7 @@ - - tty_log_fd = strtoul(name, &end, 0); - if((*end != '\0') || (end == name)){ -- printk("set_tty_log_fd - strtoul failed on '%s'\n", name); -+ printf("set_tty_log_fd - strtoul failed on '%s'\n", name); - tty_log_fd = -1; - } - return 0; -Index: uml-2.6.7/arch/um/kernel/process_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/process_kern.c 2004-07-16 19:36:45.946161584 +0300 -+++ uml-2.6.7/arch/um/kernel/process_kern.c 2004-07-16 19:47:23.723204736 +0300 -@@ -16,6 +16,7 @@ - #include "linux/module.h" - #include "linux/init.h" - #include "linux/capability.h" -+#include "linux/spinlock.h" - #include "asm/unistd.h" - #include "asm/mman.h" - #include "asm/segment.h" -@@ -23,7 +24,6 @@ - #include "asm/pgtable.h" - #include "asm/processor.h" - #include "asm/tlbflush.h" --#include "asm/spinlock.h" - #include "asm/uaccess.h" - #include "asm/user.h" - #include "user_util.h" -@@ -52,17 +52,12 @@ - - struct task_struct *get_task(int pid, int require) - { -- struct task_struct *task, *ret; -+ struct task_struct *ret; - -- ret = NULL; - read_lock(&tasklist_lock); -- for_each_process(task){ -- if(task->pid == pid){ -- ret = task; -- break; -- } -- } -+ ret = find_task_by_pid(pid); - read_unlock(&tasklist_lock); -+ - if(require && (ret == NULL)) panic("get_task couldn't find a task\n"); - return(ret); - } -@@ -95,7 +90,8 @@ - int flags = GFP_KERNEL; - - if(atomic) flags |= GFP_ATOMIC; -- if((page = __get_free_pages(flags, order)) == 0) -+ page = __get_free_pages(flags, order); -+ if(page == 0) - return(0); - stack_protections(page); - return(page); -@@ -103,13 +99,15 @@ - - int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) - { -- struct task_struct *p; -+ int pid; - - current->thread.request.u.thread.proc = fn; - current->thread.request.u.thread.arg = arg; -- p = do_fork(CLONE_VM | flags, 0, NULL, 0, NULL, NULL); -- if(IS_ERR(p)) panic("do_fork failed in kernel_thread"); -- return(p->pid); -+ pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, NULL, 0, NULL, -+ NULL); -+ if(pid < 0) -+ panic("do_fork failed in kernel_thread, errno = %d", pid); -+ return(pid); - } - - void switch_mm(struct mm_struct *prev, struct mm_struct *next, -@@ -129,7 +127,7 @@ - { external_pid(task), task }); - } - --void *switch_to(void *prev, void *next, void *last) -+void *_switch_to(void *prev, void *next, void *last) - { - return(CHOOSE_MODE(switch_to_tt(prev, next), - switch_to_skas(prev, next))); -@@ -149,7 +147,7 @@ - void exit_thread(void) - { - CHOOSE_MODE(exit_thread_tt(), exit_thread_skas()); -- unprotect_stack((unsigned long) current->thread_info); -+ unprotect_stack((unsigned long) current_thread); - } - - void *get_current(void) -@@ -157,6 +155,10 @@ - return(current); - } - -+void prepare_to_copy(struct task_struct *tsk) -+{ -+} -+ - int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, - unsigned long stack_top, struct task_struct * p, - struct pt_regs *regs) -@@ -190,7 +192,7 @@ - - void default_idle(void) - { -- idle_timer(); -+ uml_idle_timer(); - - atomic_inc(&init_mm.mm_count); - current->mm = &init_mm; -@@ -367,10 +369,15 @@ - return(clear_user(buf, size)); - } - -+int strlen_user_proc(char *str) -+{ -+ return(strlen_user(str)); -+} -+ - int smp_sigio_handler(void) - { - #ifdef CONFIG_SMP -- int cpu = current->thread_info->cpu; -+ int cpu = current_thread->cpu; - IPI_handler(cpu); - if(cpu != 0) - return(1); -@@ -385,7 +392,7 @@ - - int cpu(void) - { -- return(current->thread_info->cpu); -+ return(current_thread->cpu); - } - - /* -Index: uml-2.6.7/arch/um/config.release -=================================================================== ---- uml-2.6.7.orig/arch/um/config.release 2004-07-16 19:37:40.174917560 +0300 -+++ uml-2.6.7/arch/um/config.release 2004-07-16 19:47:23.675212032 +0300 -@@ -228,7 +228,6 @@ - CONFIG_EXT2_FS=y - CONFIG_SYSV_FS=m - CONFIG_UDF_FS=m --# CONFIG_UDF_RW is not set - CONFIG_UFS_FS=m - # CONFIG_UFS_FS_WRITE is not set - -Index: uml-2.6.7/fs/hostfs/hostfs_kern.c -=================================================================== ---- uml-2.6.7.orig/fs/hostfs/hostfs_kern.c 2004-07-16 19:47:23.631218720 +0300 -+++ uml-2.6.7/fs/hostfs/hostfs_kern.c 2004-07-16 19:47:24.263122656 +0300 -@@ -0,0 +1,1022 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ * -+ * Ported the filesystem routines to 2.5. -+ * 2003-02-10 Petr Baudis -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "hostfs.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "user_util.h" -+#include "2_5compat.h" -+#include "init.h" -+ -+struct hostfs_inode_info { -+ char *host_filename; -+ int fd; -+ int mode; -+ struct inode vfs_inode; -+}; -+ -+static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) -+{ -+ return(list_entry(inode, struct hostfs_inode_info, vfs_inode)); -+} -+ -+#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_dentry->d_inode) -+ -+int hostfs_d_delete(struct dentry *dentry) -+{ -+ return(1); -+} -+ -+struct dentry_operations hostfs_dentry_ops = { -+ .d_delete = hostfs_d_delete, -+}; -+ -+/* Changed in hostfs_args before the kernel starts running */ -+static char *root_ino = "/"; -+static int append = 0; -+ -+#define HOSTFS_SUPER_MAGIC 0x00c0ffee -+ -+static struct inode_operations hostfs_iops; -+static struct inode_operations hostfs_dir_iops; -+static struct address_space_operations hostfs_link_aops; -+ -+#ifndef MODULE -+static int __init hostfs_args(char *options, int *add) -+{ -+ char *ptr; -+ -+ ptr = strchr(options, ','); -+ if(ptr != NULL) -+ *ptr++ = '\0'; -+ if(*options != '\0') -+ root_ino = options; -+ -+ options = ptr; -+ while(options){ -+ ptr = strchr(options, ','); -+ if(ptr != NULL) -+ *ptr++ = '\0'; -+ if(*options != '\0'){ -+ if(!strcmp(options, "append")) -+ append = 1; -+ else printf("hostfs_args - unsupported option - %s\n", -+ options); -+ } -+ options = ptr; -+ } -+ return(0); -+} -+ -+__uml_setup("hostfs=", hostfs_args, -+"hostfs=,,...\n" -+" This is used to set hostfs parameters. The root directory argument\n" -+" is used to confine all hostfs mounts to within the specified directory\n" -+" tree on the host. If this isn't specified, then a user inside UML can\n" -+" mount anything on the host that's accessible to the user that's running\n" -+" it.\n" -+" The only flag currently supported is 'append', which specifies that all\n" -+" files opened by hostfs will be opened in append mode.\n\n" -+); -+#endif -+ -+static char *dentry_name(struct dentry *dentry, int extra) -+{ -+ struct dentry *parent; -+ char *root, *name; -+ int len; -+ -+ len = 0; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ len += parent->d_name.len + 1; -+ parent = parent->d_parent; -+ } -+ -+ root = HOSTFS_I(parent->d_inode)->host_filename; -+ len += strlen(root); -+ name = kmalloc(len + extra + 1, GFP_KERNEL); -+ if(name == NULL) return(NULL); -+ -+ name[len] = '\0'; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ len -= parent->d_name.len + 1; -+ name[len] = '/'; -+ strncpy(&name[len + 1], parent->d_name.name, -+ parent->d_name.len); -+ parent = parent->d_parent; -+ } -+ strncpy(name, root, strlen(root)); -+ return(name); -+} -+ -+static char *inode_name(struct inode *ino, int extra) -+{ -+ struct dentry *dentry; -+ -+ dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); -+ return(dentry_name(dentry, extra)); -+} -+ -+static int read_name(struct inode *ino, char *name) -+{ -+ /* The non-int inode fields are copied into ints by stat_file and -+ * then copied into the inode because passing the actual pointers -+ * in and having them treated as int * breaks on big-endian machines -+ */ -+ int err; -+ int i_mode, i_nlink, i_blksize; -+ unsigned long long i_size; -+ unsigned long long i_ino; -+ unsigned long long i_blocks; -+ -+ err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid, -+ &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime, -+ &ino->i_ctime, &i_blksize, &i_blocks); -+ if(err) -+ return(err); -+ -+ ino->i_ino = i_ino; -+ ino->i_mode = i_mode; -+ ino->i_nlink = i_nlink; -+ ino->i_size = i_size; -+ ino->i_blksize = i_blksize; -+ ino->i_blocks = i_blocks; -+ if((ino->i_sb->s_dev == ROOT_DEV) && (ino->i_uid == getuid())) -+ ino->i_uid = 0; -+ return(0); -+} -+ -+static char *follow_link(char *link) -+{ -+ int len, n; -+ char *name, *resolved, *end; -+ -+ len = 64; -+ while(1){ -+ n = -ENOMEM; -+ name = kmalloc(len, GFP_KERNEL); -+ if(name == NULL) -+ goto out; -+ -+ n = do_readlink(link, name, len); -+ if(n < len) -+ break; -+ len *= 2; -+ kfree(name); -+ } -+ if(n < 0) -+ goto out_free; -+ -+ if(*name == '/') -+ return(name); -+ -+ end = strrchr(link, '/'); -+ if(end == NULL) -+ return(name); -+ -+ *(end + 1) = '\0'; -+ len = strlen(link) + strlen(name) + 1; -+ -+ resolved = kmalloc(len, GFP_KERNEL); -+ if(resolved == NULL){ -+ n = -ENOMEM; -+ goto out_free; -+ } -+ -+ sprintf(resolved, "%s%s", link, name); -+ kfree(name); -+ kfree(link); -+ return(resolved); -+ -+ out_free: -+ kfree(name); -+ out: -+ return(ERR_PTR(n)); -+} -+ -+static int read_inode(struct inode *ino) -+{ -+ char *name; -+ int err = 0; -+ -+ /* Unfortunately, we are called from iget() when we don't have a dentry -+ * allocated yet. -+ */ -+ if(list_empty(&ino->i_dentry)) -+ goto out; -+ -+ err = -ENOMEM; -+ name = inode_name(ino, 0); -+ if(name == NULL) -+ goto out; -+ -+ if(file_type(name, NULL) == OS_TYPE_SYMLINK){ -+ name = follow_link(name); -+ if(IS_ERR(name)){ -+ err = PTR_ERR(name); -+ goto out; -+ } -+ } -+ -+ err = read_name(ino, name); -+ kfree(name); -+ out: -+ return(err); -+} -+ -+int hostfs_statfs(struct super_block *sb, struct kstatfs *sf) -+{ -+ /* do_statfs uses struct statfs64 internally, but the linux kernel -+ * struct statfs still has 32-bit versions for most of these fields, -+ * so we convert them here -+ */ -+ int err; -+ long long f_blocks; -+ long long f_bfree; -+ long long f_bavail; -+ long long f_files; -+ long long f_ffree; -+ -+ err = do_statfs(HOSTFS_I(sb->s_root->d_inode)->host_filename, -+ &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, -+ &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), -+ &sf->f_namelen, sf->f_spare); -+ if(err) return(err); -+ sf->f_blocks = f_blocks; -+ sf->f_bfree = f_bfree; -+ sf->f_bavail = f_bavail; -+ sf->f_files = f_files; -+ sf->f_ffree = f_ffree; -+ sf->f_type = HOSTFS_SUPER_MAGIC; -+ return(0); -+} -+ -+static struct inode *hostfs_alloc_inode(struct super_block *sb) -+{ -+ struct hostfs_inode_info *hi; -+ -+ hi = kmalloc(sizeof(*hi), GFP_KERNEL); -+ if(hi == NULL) -+ return(NULL); -+ -+ *hi = ((struct hostfs_inode_info) { .host_filename = NULL, -+ .fd = -1, -+ .mode = 0 }); -+ inode_init_once(&hi->vfs_inode); -+ return(&hi->vfs_inode); -+} -+ -+static void hostfs_delete_inode(struct inode *inode) -+{ -+ if(HOSTFS_I(inode)->fd != -1) { -+ close_file(&HOSTFS_I(inode)->fd); -+ HOSTFS_I(inode)->fd = -1; -+ } -+ clear_inode(inode); -+} -+ -+static void hostfs_destroy_inode(struct inode *inode) -+{ -+ if(HOSTFS_I(inode)->host_filename) -+ kfree(HOSTFS_I(inode)->host_filename); -+ -+ if(HOSTFS_I(inode)->fd != -1) { -+ close_file(&HOSTFS_I(inode)->fd); -+ } -+ -+ kfree(HOSTFS_I(inode)); -+} -+ -+static void hostfs_read_inode(struct inode *inode) -+{ -+ read_inode(inode); -+} -+ -+static struct super_operations hostfs_sbops = { -+ .alloc_inode = hostfs_alloc_inode, -+ .drop_inode = generic_delete_inode, -+ .delete_inode = hostfs_delete_inode, -+ .destroy_inode = hostfs_destroy_inode, -+ .read_inode = hostfs_read_inode, -+ .statfs = hostfs_statfs, -+}; -+ -+int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) -+{ -+ void *dir; -+ char *name; -+ unsigned long long next, ino; -+ int error, len; -+ -+ name = dentry_name(file->f_dentry, 0); -+ if(name == NULL) return(-ENOMEM); -+ dir = open_dir(name, &error); -+ kfree(name); -+ if(dir == NULL) return(-error); -+ next = file->f_pos; -+ while((name = read_dir(dir, &next, &ino, &len)) != NULL){ -+ error = (*filldir)(ent, name, len, file->f_pos, -+ ino, DT_UNKNOWN); -+ if(error) break; -+ file->f_pos = next; -+ } -+ close_dir(dir); -+ return(0); -+} -+ -+int hostfs_file_open(struct inode *ino, struct file *file) -+{ -+ char *name; -+ int mode = 0, r = 0, w = 0, fd; -+ -+ mode = file->f_mode & (FMODE_READ | FMODE_WRITE); -+ if((mode & HOSTFS_I(ino)->mode) == mode) -+ return(0); -+ -+ /* The file may already have been opened, but with the wrong access, -+ * so this resets things and reopens the file with the new access. -+ */ -+ if(HOSTFS_I(ino)->fd != -1){ -+ close_file(&HOSTFS_I(ino)->fd); -+ HOSTFS_I(ino)->fd = -1; -+ } -+ -+ HOSTFS_I(ino)->mode |= mode; -+ if(HOSTFS_I(ino)->mode & FMODE_READ) -+ r = 1; -+ if(HOSTFS_I(ino)->mode & FMODE_WRITE) -+ w = 1; -+ if(w) -+ r = 1; -+ -+ name = dentry_name(file->f_dentry, 0); -+ if(name == NULL) -+ return(-ENOMEM); -+ -+ fd = open_file(name, r, w, append); -+ kfree(name); -+ if(fd < 0) return(fd); -+ FILE_HOSTFS_I(file)->fd = fd; -+ -+ return(0); -+} -+ -+int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) -+{ -+ return(0); -+} -+ -+static struct file_operations hostfs_file_fops = { -+ .llseek = generic_file_llseek, -+ .read = generic_file_read, -+ .write = generic_file_write, -+ .mmap = generic_file_mmap, -+ .open = hostfs_file_open, -+ .release = NULL, -+ .fsync = hostfs_fsync, -+}; -+ -+static struct file_operations hostfs_dir_fops = { -+ .readdir = hostfs_readdir, -+ .read = generic_read_dir, -+}; -+ -+int hostfs_writepage(struct page *page, struct writeback_control *wbc) -+{ -+ struct address_space *mapping = page->mapping; -+ struct inode *inode = mapping->host; -+ char *buffer; -+ unsigned long long base; -+ int count = PAGE_CACHE_SIZE; -+ int end_index = inode->i_size >> PAGE_CACHE_SHIFT; -+ int err; -+ -+ if (page->index >= end_index) -+ count = inode->i_size & (PAGE_CACHE_SIZE-1); -+ -+ buffer = kmap(page); -+ base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT; -+ -+ err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count); -+ if(err != count){ -+ ClearPageUptodate(page); -+ goto out; -+ } -+ -+ if (base > inode->i_size) -+ inode->i_size = base; -+ -+ if (PageError(page)) -+ ClearPageError(page); -+ err = 0; -+ -+ out: -+ kunmap(page); -+ -+ unlock_page(page); -+ return err; -+} -+ -+int hostfs_readpage(struct file *file, struct page *page) -+{ -+ char *buffer; -+ long long start; -+ int err = 0; -+ -+ start = (long long) page->index << PAGE_CACHE_SHIFT; -+ buffer = kmap(page); -+ err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer, -+ PAGE_CACHE_SIZE); -+ if(err < 0) goto out; -+ -+ memset(&buffer[err], 0, PAGE_CACHE_SIZE - err); -+ -+ flush_dcache_page(page); -+ SetPageUptodate(page); -+ if (PageError(page)) ClearPageError(page); -+ err = 0; -+ out: -+ kunmap(page); -+ unlock_page(page); -+ return(err); -+} -+ -+int hostfs_prepare_write(struct file *file, struct page *page, -+ unsigned int from, unsigned int to) -+{ -+ char *buffer; -+ long long start, tmp; -+ int err; -+ -+ start = (long long) page->index << PAGE_CACHE_SHIFT; -+ buffer = kmap(page); -+ if(from != 0){ -+ tmp = start; -+ err = read_file(FILE_HOSTFS_I(file)->fd, &tmp, buffer, -+ from); -+ if(err < 0) goto out; -+ } -+ if(to != PAGE_CACHE_SIZE){ -+ start += to; -+ err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer + to, -+ PAGE_CACHE_SIZE - to); -+ if(err < 0) goto out; -+ } -+ err = 0; -+ out: -+ kunmap(page); -+ return(err); -+} -+ -+int hostfs_commit_write(struct file *file, struct page *page, unsigned from, -+ unsigned to) -+{ -+ struct address_space *mapping = page->mapping; -+ struct inode *inode = mapping->host; -+ char *buffer; -+ long long start; -+ int err = 0; -+ -+ start = (long long) (page->index << PAGE_CACHE_SHIFT) + from; -+ buffer = kmap(page); -+ err = write_file(FILE_HOSTFS_I(file)->fd, &start, buffer + from, -+ to - from); -+ if(err > 0) err = 0; -+ if(!err && (start > inode->i_size)) -+ inode->i_size = start; -+ -+ kunmap(page); -+ return(err); -+} -+ -+static struct address_space_operations hostfs_aops = { -+ .writepage = hostfs_writepage, -+ .readpage = hostfs_readpage, -+/* .set_page_dirty = __set_page_dirty_nobuffers, */ -+ .prepare_write = hostfs_prepare_write, -+ .commit_write = hostfs_commit_write -+}; -+ -+static int init_inode(struct inode *inode, struct dentry *dentry) -+{ -+ char *name; -+ int type, err = -ENOMEM, rdev; -+ -+ if(dentry){ -+ name = dentry_name(dentry, 0); -+ if(name == NULL) -+ goto out; -+ type = file_type(name, &rdev); -+ kfree(name); -+ } -+ else type = OS_TYPE_DIR; -+ -+ err = 0; -+ if(type == OS_TYPE_SYMLINK) -+ inode->i_op = &page_symlink_inode_operations; -+ else if(type == OS_TYPE_DIR) -+ inode->i_op = &hostfs_dir_iops; -+ else inode->i_op = &hostfs_iops; -+ -+ if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops; -+ else inode->i_fop = &hostfs_file_fops; -+ -+ if(type == OS_TYPE_SYMLINK) -+ inode->i_mapping->a_ops = &hostfs_link_aops; -+ else inode->i_mapping->a_ops = &hostfs_aops; -+ -+ switch (type) { -+ case OS_TYPE_CHARDEV: -+ init_special_inode(inode, S_IFCHR, rdev); -+ break; -+ case OS_TYPE_BLOCKDEV: -+ init_special_inode(inode, S_IFBLK, rdev); -+ break; -+ case OS_TYPE_FIFO: -+ init_special_inode(inode, S_IFIFO, 0); -+ break; -+ case OS_TYPE_SOCK: -+ init_special_inode(inode, S_IFSOCK, 0); -+ break; -+ } -+ out: -+ return(err); -+} -+ -+int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, -+ struct nameidata *nd) -+{ -+ struct inode *inode; -+ char *name; -+ int error, fd; -+ -+ error = -ENOMEM; -+ inode = iget(dir->i_sb, 0); -+ if(inode == NULL) goto out; -+ -+ error = init_inode(inode, dentry); -+ if(error) -+ goto out_put; -+ -+ error = -ENOMEM; -+ name = dentry_name(dentry, 0); -+ if(name == NULL) -+ goto out_put; -+ -+ fd = file_create(name, -+ mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR, -+ mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP, -+ mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); -+ if(fd < 0) -+ error = fd; -+ else error = read_name(inode, name); -+ -+ kfree(name); -+ if(error) -+ goto out_put; -+ -+ HOSTFS_I(inode)->fd = fd; -+ HOSTFS_I(inode)->mode = FMODE_READ | FMODE_WRITE; -+ d_instantiate(dentry, inode); -+ return(0); -+ -+ out_put: -+ iput(inode); -+ out: -+ return(error); -+} -+ -+struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, -+ struct nameidata *nd) -+{ -+ struct inode *inode; -+ char *name; -+ int err; -+ -+ err = -ENOMEM; -+ inode = iget(ino->i_sb, 0); -+ if(inode == NULL) -+ goto out; -+ -+ err = init_inode(inode, dentry); -+ if(err) -+ goto out_put; -+ -+ err = -ENOMEM; -+ name = dentry_name(dentry, 0); -+ if(name == NULL) -+ goto out_put; -+ -+ err = read_name(inode, name); -+ kfree(name); -+ if(err == -ENOENT){ -+ iput(inode); -+ inode = NULL; -+ } -+ else if(err) -+ goto out_put; -+ -+ d_add(dentry, inode); -+ dentry->d_op = &hostfs_dentry_ops; -+ return(NULL); -+ -+ out_put: -+ iput(inode); -+ out: -+ return(ERR_PTR(err)); -+} -+ -+static char *inode_dentry_name(struct inode *ino, struct dentry *dentry) -+{ -+ char *file; -+ int len; -+ -+ file = inode_name(ino, dentry->d_name.len + 1); -+ if(file == NULL) return(NULL); -+ strcat(file, "/"); -+ len = strlen(file); -+ strncat(file, dentry->d_name.name, dentry->d_name.len); -+ file[len + dentry->d_name.len] = '\0'; -+ return(file); -+} -+ -+int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) -+{ -+ char *from_name, *to_name; -+ int err; -+ -+ if((from_name = inode_dentry_name(ino, from)) == NULL) -+ return(-ENOMEM); -+ to_name = dentry_name(to, 0); -+ if(to_name == NULL){ -+ kfree(from_name); -+ return(-ENOMEM); -+ } -+ err = link_file(to_name, from_name); -+ kfree(from_name); -+ kfree(to_name); -+ return(err); -+} -+ -+int hostfs_unlink(struct inode *ino, struct dentry *dentry) -+{ -+ char *file; -+ int err; -+ -+ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); -+ if(append) -+ return(-EPERM); -+ -+ err = unlink_file(file); -+ kfree(file); -+ return(err); -+} -+ -+int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) -+{ -+ char *file; -+ int err; -+ -+ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); -+ err = make_symlink(file, to); -+ kfree(file); -+ return(err); -+} -+ -+int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) -+{ -+ char *file; -+ int err; -+ -+ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); -+ err = do_mkdir(file, mode); -+ kfree(file); -+ return(err); -+} -+ -+int hostfs_rmdir(struct inode *ino, struct dentry *dentry) -+{ -+ char *file; -+ int err; -+ -+ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); -+ err = do_rmdir(file); -+ kfree(file); -+ return(err); -+} -+ -+int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) -+{ -+ struct inode *inode; -+ char *name; -+ int err = -ENOMEM; -+ -+ inode = iget(dir->i_sb, 0); -+ if(inode == NULL) -+ goto out; -+ -+ err = init_inode(inode, dentry); -+ if(err) -+ goto out_put; -+ -+ err = -ENOMEM; -+ name = dentry_name(dentry, 0); -+ if(name == NULL) -+ goto out_put; -+ -+ init_special_inode(inode, mode, dev); -+ err = do_mknod(name, mode, dev); -+ if(err) -+ goto out_free; -+ -+ err = read_name(inode, name); -+ kfree(name); -+ if(err) -+ goto out_put; -+ -+ d_instantiate(dentry, inode); -+ return(0); -+ -+ out_free: -+ kfree(name); -+ out_put: -+ iput(inode); -+ out: -+ return(err); -+} -+ -+int hostfs_rename(struct inode *from_ino, struct dentry *from, -+ struct inode *to_ino, struct dentry *to) -+{ -+ char *from_name, *to_name; -+ int err; -+ -+ if((from_name = inode_dentry_name(from_ino, from)) == NULL) -+ return(-ENOMEM); -+ if((to_name = inode_dentry_name(to_ino, to)) == NULL){ -+ kfree(from_name); -+ return(-ENOMEM); -+ } -+ err = rename_file(from_name, to_name); -+ kfree(from_name); -+ kfree(to_name); -+ return(err); -+} -+ -+void hostfs_truncate(struct inode *ino) -+{ -+ not_implemented(); -+} -+ -+int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) -+{ -+ char *name; -+ int r = 0, w = 0, x = 0, err; -+ -+ if(desired & MAY_READ) r = 1; -+ if(desired & MAY_WRITE) w = 1; -+ if(desired & MAY_EXEC) x = 1; -+ name = inode_name(ino, 0); -+ if(name == NULL) return(-ENOMEM); -+ err = access_file(name, r, w, x); -+ kfree(name); -+ if(!err) err = vfs_permission(ino, desired); -+ return(err); -+} -+ -+int hostfs_setattr(struct dentry *dentry, struct iattr *attr) -+{ -+ struct hostfs_iattr attrs; -+ char *name; -+ int err; -+ -+ if(append) -+ attr->ia_valid &= ~ATTR_SIZE; -+ -+ attrs.ia_valid = 0; -+ if(attr->ia_valid & ATTR_MODE){ -+ attrs.ia_valid |= HOSTFS_ATTR_MODE; -+ attrs.ia_mode = attr->ia_mode; -+ } -+ if(attr->ia_valid & ATTR_UID){ -+ if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) && -+ (attr->ia_uid == 0)) -+ attr->ia_uid = getuid(); -+ attrs.ia_valid |= HOSTFS_ATTR_UID; -+ attrs.ia_uid = attr->ia_uid; -+ } -+ if(attr->ia_valid & ATTR_GID){ -+ if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) && -+ (attr->ia_gid == 0)) -+ attr->ia_gid = getuid(); -+ attrs.ia_valid |= HOSTFS_ATTR_GID; -+ attrs.ia_gid = attr->ia_gid; -+ } -+ if(attr->ia_valid & ATTR_SIZE){ -+ attrs.ia_valid |= HOSTFS_ATTR_SIZE; -+ attrs.ia_size = attr->ia_size; -+ } -+ if(attr->ia_valid & ATTR_ATIME){ -+ attrs.ia_valid |= HOSTFS_ATTR_ATIME; -+ attrs.ia_atime = attr->ia_atime; -+ } -+ if(attr->ia_valid & ATTR_MTIME){ -+ attrs.ia_valid |= HOSTFS_ATTR_MTIME; -+ attrs.ia_mtime = attr->ia_mtime; -+ } -+ if(attr->ia_valid & ATTR_CTIME){ -+ attrs.ia_valid |= HOSTFS_ATTR_CTIME; -+ attrs.ia_ctime = attr->ia_ctime; -+ } -+ if(attr->ia_valid & ATTR_ATIME_SET){ -+ attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET; -+ } -+ if(attr->ia_valid & ATTR_MTIME_SET){ -+ attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; -+ } -+ name = dentry_name(dentry, 0); -+ if(name == NULL) return(-ENOMEM); -+ err = set_attr(name, &attrs); -+ kfree(name); -+ if(err) -+ return(err); -+ -+ return(inode_setattr(dentry->d_inode, attr)); -+} -+ -+int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry, -+ struct kstat *stat) -+{ -+ generic_fillattr(dentry->d_inode, stat); -+ return(0); -+} -+ -+static struct inode_operations hostfs_iops = { -+ .create = hostfs_create, -+ .link = hostfs_link, -+ .unlink = hostfs_unlink, -+ .symlink = hostfs_symlink, -+ .mkdir = hostfs_mkdir, -+ .rmdir = hostfs_rmdir, -+ .mknod = hostfs_mknod, -+ .rename = hostfs_rename, -+ .truncate = hostfs_truncate, -+ .permission = hostfs_permission, -+ .setattr = hostfs_setattr, -+ .getattr = hostfs_getattr, -+}; -+ -+static struct inode_operations hostfs_dir_iops = { -+ .create = hostfs_create, -+ .lookup = hostfs_lookup, -+ .link = hostfs_link, -+ .unlink = hostfs_unlink, -+ .symlink = hostfs_symlink, -+ .mkdir = hostfs_mkdir, -+ .rmdir = hostfs_rmdir, -+ .mknod = hostfs_mknod, -+ .rename = hostfs_rename, -+ .truncate = hostfs_truncate, -+ .permission = hostfs_permission, -+ .setattr = hostfs_setattr, -+ .getattr = hostfs_getattr, -+}; -+ -+int hostfs_link_readpage(struct file *file, struct page *page) -+{ -+ char *buffer, *name; -+ long long start; -+ int err; -+ -+ start = page->index << PAGE_CACHE_SHIFT; -+ buffer = kmap(page); -+ name = inode_name(page->mapping->host, 0); -+ if(name == NULL) return(-ENOMEM); -+ err = do_readlink(name, buffer, PAGE_CACHE_SIZE); -+ kfree(name); -+ if(err == PAGE_CACHE_SIZE) -+ err = -E2BIG; -+ else if(err > 0){ -+ flush_dcache_page(page); -+ SetPageUptodate(page); -+ if (PageError(page)) ClearPageError(page); -+ err = 0; -+ } -+ kunmap(page); -+ unlock_page(page); -+ return(err); -+} -+ -+static struct address_space_operations hostfs_link_aops = { -+ .readpage = hostfs_link_readpage, -+}; -+ -+static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) -+{ -+ struct inode *root_inode; -+ char *name, *data = d; -+ int err; -+ -+ sb->s_blocksize = 1024; -+ sb->s_blocksize_bits = 10; -+ sb->s_magic = HOSTFS_SUPER_MAGIC; -+ sb->s_op = &hostfs_sbops; -+ -+ if((data == NULL) || (*data == '\0')) -+ data = root_ino; -+ -+ err = -ENOMEM; -+ name = kmalloc(strlen(data) + 1, GFP_KERNEL); -+ if(name == NULL) -+ goto out; -+ -+ strcpy(name, data); -+ -+ root_inode = iget(sb, 0); -+ if(root_inode == NULL) -+ goto out_free; -+ -+ err = init_inode(root_inode, NULL); -+ if(err) -+ goto out_put; -+ -+ HOSTFS_I(root_inode)->host_filename = name; -+ -+ err = -ENOMEM; -+ sb->s_root = d_alloc_root(root_inode); -+ if(sb->s_root == NULL) -+ goto out_put; -+ -+ err = read_inode(root_inode); -+ if(err) -+ goto out_put; -+ -+ return(0); -+ -+ out_put: -+ iput(root_inode); -+ out_free: -+ kfree(name); -+ out: -+ return(err); -+} -+ -+static struct super_block *hostfs_read_sb(struct file_system_type *type, -+ int flags, const char *dev_name, -+ void *data) -+{ -+ return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common)); -+} -+ -+static struct file_system_type hostfs_type = { -+ .owner = THIS_MODULE, -+ .name = "hostfs", -+ .get_sb = hostfs_read_sb, -+ .kill_sb = kill_anon_super, -+ .fs_flags = 0, -+}; -+ -+static int __init init_hostfs(void) -+{ -+ return(register_filesystem(&hostfs_type)); -+} -+ -+static void __exit exit_hostfs(void) -+{ -+ unregister_filesystem(&hostfs_type); -+} -+ -+module_init(init_hostfs) -+module_exit(exit_hostfs) -+MODULE_LICENSE("GPL"); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/include/asm-um/module-generic.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/module-generic.h 2004-07-16 19:47:23.634218264 +0300 -+++ uml-2.6.7/include/asm-um/module-generic.h 2004-07-16 19:47:23.791194400 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MODULE_GENERIC_H -+#define __UM_MODULE_GENERIC_H -+ -+#include "asm/arch/module.h" -+ -+#endif -Index: uml-2.6.7/arch/um/include/user_util.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/user_util.h 2004-07-16 19:36:02.218809152 +0300 -+++ uml-2.6.7/arch/um/include/user_util.h 2004-07-16 19:47:24.794041944 +0300 -@@ -14,8 +14,6 @@ - extern int unlockpt(int __fd); - extern char *ptsname(int __fd); - --enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; -- - struct cpu_task { - int pid; - void *task; -@@ -59,13 +57,11 @@ - extern void *add_signal_handler(int sig, void (*handler)(int)); - extern int start_fork_tramp(void *arg, unsigned long temp_stack, - int clone_flags, int (*tramp)(void *)); --extern int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags); - extern int linux_main(int argc, char **argv); - extern void set_cmdline(char *cmd); - extern void input_cb(void (*proc)(void *), void *arg, int arg_len); - extern int get_pty(void); - extern void *um_kmalloc(int size); --extern int raw(int fd, int complain); - extern int switcheroo(int fd, int prot, void *from, void *to, int size); - extern void setup_machinename(char *machine_out); - extern void setup_hostinfo(void); -@@ -86,11 +82,17 @@ - extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr); - extern void write_sigio_workaround(void); - extern void arch_check_bugs(void); -+extern int cpu_feature(char *what, char *buf, int len); - extern int arch_handle_signal(int sig, union uml_pt_regs *regs); - extern int arch_fixup(unsigned long address, void *sc_ptr); - extern void forward_pending_sigio(int target); - extern int can_do_skas(void); -- -+extern void arch_init_thread(void); -+ -+extern int __raw(int fd, int complain, int now); -+#define raw(fd, complain) __raw((fd), (complain), 1) -+ -+#define CATCH_EINTR(expr) while ( ((expr) < 0) && errno == EINTR) - #endif - - /* -Index: uml-2.6.7/include/asm-um/cpufeature.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/cpufeature.h 2004-07-16 19:47:23.633218416 +0300 -+++ uml-2.6.7/include/asm-um/cpufeature.h 2004-07-16 19:47:23.788194856 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_CPUFEATURE_H -+#define __UM_CPUFEATURE_H -+ -+#include "asm/arch/cpufeature.h" -+ -+#endif -Index: uml-2.6.7/arch/um/kernel/uaccess_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/uaccess_user.c 2004-07-16 19:37:08.883674552 +0300 -+++ uml-2.6.7/arch/um/kernel/uaccess_user.c 2004-07-16 19:47:23.754200024 +0300 -@@ -20,7 +20,7 @@ - - jmp_buf jbuf; - *fault_catcher = &jbuf; -- if(setjmp(jbuf) == 0){ -+ if(sigsetjmp(jbuf, 1) == 0){ - (*op)(to, from, n); - ret = 0; - *faulted_out = 0; -Index: uml-2.6.7/arch/um/kernel/tt/exec_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/exec_kern.c 2004-07-16 19:36:10.502549832 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/exec_kern.c 2004-07-16 19:47:23.743201696 +0300 -@@ -17,6 +17,7 @@ - #include "mem_user.h" - #include "os.h" - #include "tlb.h" -+#include "mode.h" - - static int exec_tramp(void *sig_stack) - { -@@ -47,17 +48,17 @@ - do_exit(SIGKILL); - } - -- if(current->thread_info->cpu == 0) -+ if(current_thread->cpu == 0) - forward_interrupts(new_pid); - current->thread.request.op = OP_EXEC; - current->thread.request.u.exec.pid = new_pid; -- unprotect_stack((unsigned long) current->thread_info); -+ unprotect_stack((unsigned long) current_thread); - os_usr1_process(os_getpid()); - - enable_timer(); - free_page(stack); - protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 1, 0, 1); -- task_protections((unsigned long) current->thread_info); -+ task_protections((unsigned long) current_thread); - force_flush_all(); - unblock_signals(); - } -Index: uml-2.6.7/arch/um/kernel/frame.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/frame.c 2004-07-16 19:36:25.094331544 +0300 -+++ uml-2.6.7/arch/um/kernel/frame.c 2004-07-16 19:47:24.796041640 +0300 -@@ -21,6 +21,7 @@ - #include "sysdep/sigcontext.h" - #include "frame_user.h" - #include "kern_util.h" -+#include "user_util.h" - #include "ptrace_user.h" - #include "os.h" - -@@ -40,7 +41,7 @@ - /* Wait for it to stop itself and continue it with a SIGUSR1 to force - * it into the signal handler. - */ -- n = waitpid(pid, &status, WUNTRACED); -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0){ - printf("capture_stack : waitpid failed - errno = %d\n", errno); - exit(1); -@@ -60,7 +61,7 @@ - * At this point, the handler has stuffed the addresses of - * sig, sc, and SA_RESTORER in raw. - */ -- n = waitpid(pid, &status, WUNTRACED); -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0){ - printf("capture_stack : waitpid failed - errno = %d\n", errno); - exit(1); -@@ -82,7 +83,8 @@ - errno); - exit(1); - } -- if(waitpid(pid, &status, 0) < 0){ -+ CATCH_EINTR(n = waitpid(pid, &status, 0)); -+ if(n < 0){ - printf("capture_stack : waitpid failed - errno = %d\n", errno); - exit(1); - } -@@ -279,7 +281,7 @@ - struct sc_frame_raw raw_sc; - struct si_frame_raw raw_si; - void *stack, *sigstack; -- unsigned long top, sig_top, base; -+ unsigned long top, base; - - stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -@@ -292,7 +294,6 @@ - } - - top = (unsigned long) stack + PAGE_SIZE - sizeof(void *); -- sig_top = (unsigned long) sigstack + PAGE_SIZE; - - /* Get the sigcontext, no sigrestorer layout */ - raw_sc.restorer = 0; -Index: uml-2.6.7/include/asm-um/unistd.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/unistd.h 2004-07-16 19:37:20.150961664 +0300 -+++ uml-2.6.7/include/asm-um/unistd.h 2004-07-16 19:47:23.804192424 +0300 -@@ -48,7 +48,10 @@ - set_fs(KERNEL_DS); \ - ret = sys(args); \ - set_fs(fs); \ -- return ret; -+ if (ret >= 0) \ -+ return ret; \ -+ errno = -(long)ret; \ -+ return -1; - - static inline long open(const char *pathname, int flags, int mode) - { -Index: uml-2.6.7/arch/um/include/mem_kern.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/mem_kern.h 2004-07-16 19:47:23.613221456 +0300 -+++ uml-2.6.7/arch/um/include/mem_kern.h 2004-07-16 19:47:23.704207624 +0300 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MEM_KERN_H__ -+#define __MEM_KERN_H__ -+ -+#include "linux/list.h" -+#include "linux/types.h" -+ -+struct remapper { -+ struct list_head list; -+ int (*proc)(int, unsigned long, int, __u64); -+}; -+ -+extern void register_remapper(struct remapper *info); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/dyn.lds.S -=================================================================== ---- uml-2.6.7.orig/arch/um/dyn.lds.S 2004-07-16 19:36:43.059600408 +0300 -+++ uml-2.6.7/arch/um/dyn.lds.S 2004-07-16 19:47:23.700208232 +0300 -@@ -1,3 +1,5 @@ -+#include -+ - OUTPUT_FORMAT(ELF_FORMAT) - OUTPUT_ARCH(ELF_ARCH) - ENTRY(_start) -@@ -10,12 +12,15 @@ - { - . = START + SIZEOF_HEADERS; - .interp : { *(.interp) } -- . = ALIGN(4096); - __binary_start = .; - . = ALIGN(4096); /* Init code and data */ - _stext = .; - __init_begin = .; -- .text.init : { *(.text.init) } -+ .init.text : { -+ _sinittext = .; -+ *(.init.text) -+ _einittext = .; -+ } - - . = ALIGN(4096); - -@@ -55,7 +60,9 @@ - } =0x90909090 - .plt : { *(.plt) } - .text : { -- *(.text .stub .text.* .gnu.linkonce.t.*) -+ *(.text) -+ SCHED_TEXT -+ *(.stub .text.* .gnu.linkonce.t.*) - /* .gnu.warning sections are handled specially by elf32.em. */ - *(.gnu.warning) - } =0x90909090 -@@ -67,7 +74,7 @@ - - #include "asm/common.lds.S" - -- .data.init : { *(.data.init) } -+ init.data : { *(.init.data) } - - /* Ensure the __preinit_array_start label is properly aligned. We - could instead move the label definition inside the section, but -Index: uml-2.6.7/arch/um/kernel/trap_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/trap_user.c 2004-07-16 19:36:37.162496904 +0300 -+++ uml-2.6.7/arch/um/kernel/trap_user.c 2004-07-16 19:47:24.799041184 +0300 -@@ -5,11 +5,9 @@ - - #include - #include --#include - #include - #include - #include --#include - #include - #include - #include -@@ -34,7 +32,14 @@ - { - kill(pid, SIGKILL); - kill(pid, SIGCONT); -- while(waitpid(pid, NULL, 0) > 0) kill(pid, SIGCONT); -+ do { -+ int n; -+ CATCH_EINTR(n = waitpid(pid, NULL, 0)); -+ if (n > 0) -+ kill(pid, SIGCONT); -+ else -+ break; -+ } while(1); - } - - /* Unlocked - don't care if this is a bit off */ -@@ -82,6 +87,8 @@ - .is_irq = 0 }, - [ SIGILL ] { .handler = relay_signal, - .is_irq = 0 }, -+ [ SIGWINCH ] { .handler = winch, -+ .is_irq = 1 }, - [ SIGBUS ] { .handler = bus_handler, - .is_irq = 0 }, - [ SIGSEGV] { .handler = segv_handler, -@@ -102,12 +109,11 @@ - sig, &sc); - } - --extern int timer_irq_inited, missed_ticks[]; -+extern int timer_irq_inited; - - void alarm_handler(int sig, struct sigcontext sc) - { - if(!timer_irq_inited) return; -- missed_ticks[cpu()]++; - - if(sig == SIGALRM) - switch_timers(0); -@@ -123,7 +129,7 @@ - { - jmp_buf *buf = b; - -- longjmp(*buf, val); -+ siglongjmp(*buf, val); - } - - /* -Index: uml-2.6.7/arch/um/Kconfig_net -=================================================================== ---- uml-2.6.7.orig/arch/um/Kconfig_net 2004-07-16 19:36:54.256898160 +0300 -+++ uml-2.6.7/arch/um/Kconfig_net 2004-07-16 19:47:23.712206408 +0300 -@@ -1,5 +1,5 @@ - --menu "Network Devices" -+menu "UML Network Devices" - depends on NET - - # UML virtual driver -@@ -176,73 +176,5 @@ - - Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" - -- --# Below are hardware-independent drivers mirrored from --# drivers/net/Config.in. It would be nice if Linux --# had HW independent drivers separated from the other --# but it does not. Until then each non-ISA/PCI arch --# needs to provide it's own menu of network drivers --config DUMMY -- tristate "Dummy net driver support" -- --config BONDING -- tristate "Bonding driver support" -- --config EQUALIZER -- tristate "EQL (serial line load balancing) support" -- --config TUN -- tristate "Universal TUN/TAP device driver support" -- --config ETHERTAP -- tristate "Ethertap network tap (OBSOLETE)" -- depends on EXPERIMENTAL && NETLINK -- --config PPP -- tristate "PPP (point-to-point protocol) support" -- --config PPP_MULTILINK -- bool "PPP multilink support (EXPERIMENTAL)" -- depends on PPP && EXPERIMENTAL -- --config PPP_FILTER -- bool "PPP filtering" -- depends on PPP && FILTER -- --config PPP_ASYNC -- tristate "PPP support for async serial ports" -- depends on PPP -- --config PPP_SYNC_TTY -- tristate "PPP support for sync tty ports" -- depends on PPP -- --config PPP_DEFLATE -- tristate "PPP Deflate compression" -- depends on PPP -- --config PPP_BSDCOMP -- tristate "PPP BSD-Compress compression" -- depends on PPP -- --config PPPOE -- tristate "PPP over Ethernet (EXPERIMENTAL)" -- depends on PPP && EXPERIMENTAL -- --config SLIP -- tristate "SLIP (serial line) support" -- --config SLIP_COMPRESSED -- bool "CSLIP compressed headers" -- depends on SLIP=y -- --config SLIP_SMART -- bool "Keepalive and linefill" -- depends on SLIP=y -- --config SLIP_MODE_SLIP6 -- bool "Six bit SLIP encapsulation" -- depends on SLIP=y -- - endmenu - -Index: uml-2.6.7/fs/hppfs/Makefile -=================================================================== ---- uml-2.6.7.orig/fs/hppfs/Makefile 2004-07-16 19:47:23.632218568 +0300 -+++ uml-2.6.7/fs/hppfs/Makefile 2004-07-16 19:47:23.786195160 +0300 -@@ -0,0 +1,19 @@ -+# -+# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+hppfs-objs := hppfs_kern.o -+ -+obj-y = -+obj-$(CONFIG_HPPFS) += hppfs.o -+ -+clean: -+ -+modules: -+ -+fastdep: -+ -+dep: -+ -+archmrproper: clean -Index: uml-2.6.7/arch/um/kernel/trap_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/trap_kern.c 2004-07-16 19:35:56.026750488 +0300 -+++ uml-2.6.7/arch/um/kernel/trap_kern.c 2004-07-16 19:47:23.742201848 +0300 -@@ -16,12 +16,15 @@ - #include "asm/tlbflush.h" - #include "asm/a.out.h" - #include "asm/current.h" -+#include "asm/irq.h" - #include "user_util.h" - #include "kern_util.h" - #include "kern.h" - #include "chan_kern.h" - #include "mconsole_kern.h" - #include "2_5compat.h" -+#include "mem.h" -+#include "mem_kern.h" - - int handle_page_fault(unsigned long address, unsigned long ip, - int is_write, int is_user, int *code_out) -@@ -51,12 +54,12 @@ - if(is_write && !(vma->vm_flags & VM_WRITE)) - goto out; - page = address & PAGE_MASK; -- if(page == (unsigned long) current->thread_info + PAGE_SIZE) -+ if(page == (unsigned long) current_thread + PAGE_SIZE) - panic("Kernel stack overflow"); - pgd = pgd_offset(mm, page); - pmd = pmd_offset(pgd, page); -- survive: - do { -+ survive: - switch (handle_mm_fault(mm, vma, address, is_write)){ - case VM_FAULT_MINOR: - current->min_flt++; -@@ -71,14 +74,20 @@ - err = -ENOMEM; - goto out_of_memory; - default: -- BUG(); -+ if (current->pid == 1) { -+ up_read(&mm->mmap_sem); -+ yield(); -+ down_read(&mm->mmap_sem); -+ goto survive; -+ } -+ goto out; - } - pte = pte_offset_kernel(pmd, page); - } while(!pte_present(*pte)); -+ err = 0; - *pte = pte_mkyoung(*pte); - if(pte_write(*pte)) *pte = pte_mkdirty(*pte); - flush_tlb_page(vma, page); -- err = 0; - out: - up_read(&mm->mmap_sem); - return(err); -@@ -98,6 +107,33 @@ - goto out; - } - -+LIST_HEAD(physmem_remappers); -+ -+void register_remapper(struct remapper *info) -+{ -+ list_add(&info->list, &physmem_remappers); -+} -+ -+static int check_remapped_addr(unsigned long address, int is_write) -+{ -+ struct remapper *remapper; -+ struct list_head *ele; -+ __u64 offset; -+ int fd; -+ -+ fd = phys_mapping(__pa(address), &offset); -+ if(fd == -1) -+ return(0); -+ -+ list_for_each(ele, &physmem_remappers){ -+ remapper = list_entry(ele, struct remapper, list); -+ if((*remapper->proc)(fd, address, is_write, offset)) -+ return(1); -+ } -+ -+ return(0); -+} -+ - unsigned long segv(unsigned long address, unsigned long ip, int is_write, - int is_user, void *sc) - { -@@ -109,7 +145,9 @@ - flush_tlb_kernel_vm(); - return(0); - } -- if(current->mm == NULL) -+ else if(check_remapped_addr(address & PAGE_MASK, is_write)) -+ return(0); -+ else if(current->mm == NULL) - panic("Segfault with no mm"); - err = handle_page_fault(address, ip, is_write, is_user, &si.si_code); - -@@ -120,9 +158,8 @@ - current->thread.fault_addr = (void *) address; - do_longjmp(catcher, 1); - } -- else if(current->thread.fault_addr != NULL){ -+ else if(current->thread.fault_addr != NULL) - panic("fault_addr set but no fault catcher"); -- } - else if(arch_fixup(ip, sc)) - return(0); - -@@ -155,8 +192,6 @@ - { - struct siginfo si; - -- printk(KERN_ERR "Unfixable SEGV in '%s' (pid %d) at 0x%lx " -- "(ip 0x%lx)\n", current->comm, current->pid, address, ip); - si.si_signo = SIGSEGV; - si.si_code = SEGV_ACCERR; - si.si_addr = (void *) address; -@@ -180,6 +215,11 @@ - else relay_signal(sig, regs); - } - -+void winch(int sig, union uml_pt_regs *regs) -+{ -+ do_IRQ(WINCH_IRQ, regs); -+} -+ - void trap_init(void) - { - } -Index: uml-2.6.7/arch/um/drivers/cow_sys.h -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/cow_sys.h 2004-07-16 19:47:23.608222216 +0300 -+++ uml-2.6.7/arch/um/drivers/cow_sys.h 2004-07-16 19:47:23.679211424 +0300 -@@ -0,0 +1,48 @@ -+#ifndef __COW_SYS_H__ -+#define __COW_SYS_H__ -+ -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+#include "user.h" -+ -+static inline void *cow_malloc(int size) -+{ -+ return(um_kmalloc(size)); -+} -+ -+static inline void cow_free(void *ptr) -+{ -+ kfree(ptr); -+} -+ -+#define cow_printf printk -+ -+static inline char *cow_strdup(char *str) -+{ -+ return(uml_strdup(str)); -+} -+ -+static inline int cow_seek_file(int fd, __u64 offset) -+{ -+ return(os_seek_file(fd, offset)); -+} -+ -+static inline int cow_file_size(char *file, __u64 *size_out) -+{ -+ return(os_file_size(file, size_out)); -+} -+ -+static inline int cow_write_file(int fd, char *buf, int size) -+{ -+ return(os_write_file(fd, buf, size)); -+} -+ -+#endif -+ -+/* -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/drivers/harddog_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/harddog_user.c 2004-07-16 19:36:22.284758664 +0300 -+++ uml-2.6.7/arch/um/drivers/harddog_user.c 2004-07-16 19:47:23.682210968 +0300 -@@ -27,10 +27,10 @@ - dup2(data->stdin, 0); - dup2(data->stdout, 1); - dup2(data->stdout, 2); -- close(data->stdin); -- close(data->stdout); -- close(data->close_me[0]); -- close(data->close_me[1]); -+ os_close_file(data->stdin); -+ os_close_file(data->stdout); -+ os_close_file(data->close_me[0]); -+ os_close_file(data->close_me[1]); - } - - int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock) -@@ -44,15 +44,15 @@ - char **args = NULL; - - err = os_pipe(in_fds, 1, 0); -- if(err){ -- printk("harddog_open - os_pipe failed, errno = %d\n", -err); -- return(err); -+ if(err < 0){ -+ printk("harddog_open - os_pipe failed, err = %d\n", -err); -+ goto out; - } - - err = os_pipe(out_fds, 1, 0); -- if(err){ -- printk("harddog_open - os_pipe failed, errno = %d\n", -err); -- return(err); -+ if(err < 0){ -+ printk("harddog_open - os_pipe failed, err = %d\n", -err); -+ goto out_close_in; - } - - data.stdin = out_fds[0]; -@@ -72,42 +72,47 @@ - - pid = run_helper(pre_exec, &data, args, NULL); - -- close(out_fds[0]); -- close(in_fds[1]); -+ os_close_file(out_fds[0]); -+ os_close_file(in_fds[1]); - - if(pid < 0){ - err = -pid; -- printk("harddog_open - run_helper failed, errno = %d\n", err); -- goto out; -+ printk("harddog_open - run_helper failed, errno = %d\n", -err); -+ goto out_close_out; - } - -- n = read(in_fds[0], &c, sizeof(c)); -+ n = os_read_file(in_fds[0], &c, sizeof(c)); - if(n == 0){ - printk("harddog_open - EOF on watchdog pipe\n"); - helper_wait(pid); - err = -EIO; -- goto out; -+ goto out_close_out; - } - else if(n < 0){ - printk("harddog_open - read of watchdog pipe failed, " -- "errno = %d\n", errno); -+ "err = %d\n", -n); - helper_wait(pid); -- err = -errno; -- goto out; -+ err = n; -+ goto out_close_out; - } - *in_fd_ret = in_fds[0]; - *out_fd_ret = out_fds[1]; - return(0); -+ -+ out_close_in: -+ os_close_file(in_fds[0]); -+ os_close_file(in_fds[1]); -+ out_close_out: -+ os_close_file(out_fds[0]); -+ os_close_file(out_fds[1]); - out: -- close(out_fds[1]); -- close(in_fds[0]); - return(err); - } - - void stop_watchdog(int in_fd, int out_fd) - { -- close(in_fd); -- close(out_fd); -+ os_close_file(in_fd); -+ os_close_file(out_fd); - } - - int ping_watchdog(int fd) -@@ -115,11 +120,12 @@ - int n; - char c = '\n'; - -- n = write(fd, &c, sizeof(c)); -- if(n < sizeof(c)){ -- printk("ping_watchdog - write failed, errno = %d\n", -- errno); -- return(-errno); -+ n = os_write_file(fd, &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("ping_watchdog - write failed, err = %d\n", -n); -+ if(n < 0) -+ return(n); -+ return(-EIO); - } - return 1; - -Index: uml-2.6.7/fs/hostfs/hostfs_user.c -=================================================================== ---- uml-2.6.7.orig/fs/hostfs/hostfs_user.c 2004-07-16 19:47:23.631218720 +0300 -+++ uml-2.6.7/fs/hostfs/hostfs_user.c 2004-07-16 19:47:23.783195616 +0300 -@@ -0,0 +1,361 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "hostfs.h" -+#include "kern_util.h" -+#include "user.h" -+ -+int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, -+ int *nlink_out, int *uid_out, int *gid_out, -+ unsigned long long *size_out, struct timespec *atime_out, -+ struct timespec *mtime_out, struct timespec *ctime_out, -+ int *blksize_out, unsigned long long *blocks_out) -+{ -+ struct stat64 buf; -+ -+ if(lstat64(path, &buf) < 0) -+ return(-errno); -+ -+ /* See the Makefile for why STAT64_INO_FIELD is passed in -+ * by the build -+ */ -+ if(inode_out != NULL) *inode_out = buf.STAT64_INO_FIELD; -+ if(mode_out != NULL) *mode_out = buf.st_mode; -+ if(nlink_out != NULL) *nlink_out = buf.st_nlink; -+ if(uid_out != NULL) *uid_out = buf.st_uid; -+ if(gid_out != NULL) *gid_out = buf.st_gid; -+ if(size_out != NULL) *size_out = buf.st_size; -+ if(atime_out != NULL) { -+ atime_out->tv_sec = buf.st_atime; -+ atime_out->tv_nsec = 0; -+ } -+ if(mtime_out != NULL) { -+ mtime_out->tv_sec = buf.st_mtime; -+ mtime_out->tv_nsec = 0; -+ } -+ if(ctime_out != NULL) { -+ ctime_out->tv_sec = buf.st_ctime; -+ ctime_out->tv_nsec = 0; -+ } -+ if(blksize_out != NULL) *blksize_out = buf.st_blksize; -+ if(blocks_out != NULL) *blocks_out = buf.st_blocks; -+ return(0); -+} -+ -+int file_type(const char *path, int *rdev) -+{ -+ struct stat64 buf; -+ -+ if(lstat64(path, &buf) < 0) -+ return(-errno); -+ if(rdev != NULL) -+ *rdev = buf.st_rdev; -+ -+ if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR); -+ else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK); -+ else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV); -+ else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV); -+ else if(S_ISFIFO(buf.st_mode))return(OS_TYPE_FIFO); -+ else if(S_ISSOCK(buf.st_mode))return(OS_TYPE_SOCK); -+ else return(OS_TYPE_FILE); -+} -+ -+int access_file(char *path, int r, int w, int x) -+{ -+ int mode = 0; -+ -+ if(r) mode = R_OK; -+ if(w) mode |= W_OK; -+ if(x) mode |= X_OK; -+ if(access(path, mode) != 0) return(-errno); -+ else return(0); -+} -+ -+int open_file(char *path, int r, int w, int append) -+{ -+ int mode = 0, fd; -+ -+ if(r && !w) -+ mode = O_RDONLY; -+ else if(!r && w) -+ mode = O_WRONLY; -+ else if(r && w) -+ mode = O_RDWR; -+ else panic("Impossible mode in open_file"); -+ -+ if(append) -+ mode |= O_APPEND; -+ fd = open64(path, mode); -+ if(fd < 0) return(-errno); -+ else return(fd); -+} -+ -+void *open_dir(char *path, int *err_out) -+{ -+ DIR *dir; -+ -+ dir = opendir(path); -+ *err_out = errno; -+ if(dir == NULL) return(NULL); -+ return(dir); -+} -+ -+char *read_dir(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out) -+{ -+ DIR *dir = stream; -+ struct dirent *ent; -+ -+ seekdir(dir, *pos); -+ ent = readdir(dir); -+ if(ent == NULL) return(NULL); -+ *len_out = strlen(ent->d_name); -+ *ino_out = ent->d_ino; -+ *pos = telldir(dir); -+ return(ent->d_name); -+} -+ -+int read_file(int fd, unsigned long long *offset, char *buf, int len) -+{ -+ int n; -+ -+ n = pread64(fd, buf, len, *offset); -+ if(n < 0) return(-errno); -+ *offset += n; -+ return(n); -+} -+ -+int write_file(int fd, unsigned long long *offset, const char *buf, int len) -+{ -+ int n; -+ -+ n = pwrite64(fd, buf, len, *offset); -+ if(n < 0) return(-errno); -+ *offset += n; -+ return(n); -+} -+ -+int lseek_file(int fd, long long offset, int whence) -+{ -+ int ret; -+ -+ ret = lseek64(fd, offset, whence); -+ if(ret < 0) return(-errno); -+ return(0); -+} -+ -+void close_file(void *stream) -+{ -+ close(*((int *) stream)); -+} -+ -+void close_dir(void *stream) -+{ -+ closedir(stream); -+} -+ -+int file_create(char *name, int ur, int uw, int ux, int gr, -+ int gw, int gx, int or, int ow, int ox) -+{ -+ int mode, fd; -+ -+ mode = 0; -+ mode |= ur ? S_IRUSR : 0; -+ mode |= uw ? S_IWUSR : 0; -+ mode |= ux ? S_IXUSR : 0; -+ mode |= gr ? S_IRGRP : 0; -+ mode |= gw ? S_IWGRP : 0; -+ mode |= gx ? S_IXGRP : 0; -+ mode |= or ? S_IROTH : 0; -+ mode |= ow ? S_IWOTH : 0; -+ mode |= ox ? S_IXOTH : 0; -+ fd = open64(name, O_CREAT | O_RDWR, mode); -+ if(fd < 0) -+ return(-errno); -+ return(fd); -+} -+ -+int set_attr(const char *file, struct hostfs_iattr *attrs) -+{ -+ struct utimbuf buf; -+ int err, ma; -+ -+ if(attrs->ia_valid & HOSTFS_ATTR_MODE){ -+ if(chmod(file, attrs->ia_mode) != 0) return(-errno); -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_UID){ -+ if(chown(file, attrs->ia_uid, -1)) return(-errno); -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_GID){ -+ if(chown(file, -1, attrs->ia_gid)) return(-errno); -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_SIZE){ -+ if(truncate(file, attrs->ia_size)) return(-errno); -+ } -+ ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET; -+ if((attrs->ia_valid & ma) == ma){ -+ buf.actime = attrs->ia_atime.tv_sec; -+ buf.modtime = attrs->ia_mtime.tv_sec; -+ if(utime(file, &buf) != 0) return(-errno); -+ } -+ else { -+ struct timespec ts; -+ -+ if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){ -+ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, -+ NULL, NULL, &ts, NULL, NULL, NULL); -+ if(err != 0) -+ return(err); -+ buf.actime = attrs->ia_atime.tv_sec; -+ buf.modtime = ts.tv_sec; -+ if(utime(file, &buf) != 0) -+ return(-errno); -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){ -+ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, -+ NULL, &ts, NULL, NULL, NULL, NULL); -+ if(err != 0) -+ return(err); -+ buf.actime = ts.tv_sec; -+ buf.modtime = attrs->ia_mtime.tv_sec; -+ if(utime(file, &buf) != 0) -+ return(-errno); -+ } -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ; -+ if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){ -+ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, -+ &attrs->ia_atime, &attrs->ia_mtime, NULL, -+ NULL, NULL); -+ if(err != 0) return(err); -+ } -+ return(0); -+} -+ -+int make_symlink(const char *from, const char *to) -+{ -+ int err; -+ -+ err = symlink(to, from); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int unlink_file(const char *file) -+{ -+ int err; -+ -+ err = unlink(file); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int do_mkdir(const char *file, int mode) -+{ -+ int err; -+ -+ err = mkdir(file, mode); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int do_rmdir(const char *file) -+{ -+ int err; -+ -+ err = rmdir(file); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int do_mknod(const char *file, int mode, int dev) -+{ -+ int err; -+ -+ err = mknod(file, mode, dev); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int link_file(const char *to, const char *from) -+{ -+ int err; -+ -+ err = link(to, from); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int do_readlink(char *file, char *buf, int size) -+{ -+ int n; -+ -+ n = readlink(file, buf, size); -+ if(n < 0) -+ return(-errno); -+ if(n < size) -+ buf[n] = '\0'; -+ return(n); -+} -+ -+int rename_file(char *from, char *to) -+{ -+ int err; -+ -+ err = rename(from, to); -+ if(err < 0) return(-errno); -+ return(0); -+} -+ -+int do_statfs(char *root, long *bsize_out, long long *blocks_out, -+ long long *bfree_out, long long *bavail_out, -+ long long *files_out, long long *ffree_out, -+ void *fsid_out, int fsid_size, long *namelen_out, -+ long *spare_out) -+{ -+ struct statfs64 buf; -+ int err; -+ -+ err = statfs64(root, &buf); -+ if(err < 0) return(-errno); -+ *bsize_out = buf.f_bsize; -+ *blocks_out = buf.f_blocks; -+ *bfree_out = buf.f_bfree; -+ *bavail_out = buf.f_bavail; -+ *files_out = buf.f_files; -+ *ffree_out = buf.f_ffree; -+ memcpy(fsid_out, &buf.f_fsid, -+ sizeof(buf.f_fsid) > fsid_size ? fsid_size : -+ sizeof(buf.f_fsid)); -+ *namelen_out = buf.f_namelen; -+ spare_out[0] = buf.f_spare[0]; -+ spare_out[1] = buf.f_spare[1]; -+ spare_out[2] = buf.f_spare[2]; -+ spare_out[3] = buf.f_spare[3]; -+ spare_out[4] = buf.f_spare[4]; -+ spare_out[5] = buf.f_spare[5]; -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/kernel/skas/mem_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/mem_user.c 2004-07-16 19:36:25.158321816 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/mem_user.c 2004-07-16 19:47:23.729203824 +0300 -@@ -7,6 +7,7 @@ - #include - #include - #include "mem_user.h" -+#include "mem.h" - #include "user.h" - #include "os.h" - #include "proc_mm.h" -@@ -15,12 +16,12 @@ - int r, int w, int x) - { - struct proc_mm_op map; -- struct mem_region *region; -- int prot, n; -+ __u64 offset; -+ int prot, n, phys_fd; - - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | - (x ? PROT_EXEC : 0); -- region = phys_region(phys); -+ phys_fd = phys_mapping(phys, &offset); - - map = ((struct proc_mm_op) { .op = MM_MMAP, - .u = -@@ -30,12 +31,12 @@ - .prot = prot, - .flags = MAP_SHARED | - MAP_FIXED, -- .fd = region->fd, -- .offset = phys_offset(phys) -+ .fd = phys_fd, -+ .offset = offset - } } } ); - n = os_write_file(fd, &map, sizeof(map)); - if(n != sizeof(map)) -- printk("map : /proc/mm map failed, errno = %d\n", errno); -+ printk("map : /proc/mm map failed, err = %d\n", -n); - } - - int unmap(int fd, void *addr, int len) -@@ -49,8 +50,13 @@ - { .addr = (unsigned long) addr, - .len = len } } } ); - n = os_write_file(fd, &unmap, sizeof(unmap)); -- if((n != 0) && (n != sizeof(unmap))) -- return(-errno); -+ if(n != sizeof(unmap)) { -+ if(n < 0) -+ return(n); -+ else if(n > 0) -+ return(-EIO); -+ } -+ - return(0); - } - -@@ -71,11 +77,15 @@ - .prot = prot } } } ); - - n = os_write_file(fd, &protect, sizeof(protect)); -- if((n != 0) && (n != sizeof(protect))){ -+ if(n != sizeof(protect)) { -+ if(n == 0) return(0); -+ - if(must_succeed) -- panic("protect failed, errno = %d", errno); -- return(-errno); -+ panic("protect failed, err = %d", -n); -+ -+ return(-EIO); - } -+ - return(0); - } - -Index: uml-2.6.7/arch/um/kernel/skas/include/ptrace-skas.h -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/include/ptrace-skas.h 2004-07-16 19:36:04.873405592 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/include/ptrace-skas.h 2004-07-16 19:47:24.724052584 +0300 -@@ -10,6 +10,15 @@ - - #ifdef UML_CONFIG_MODE_SKAS - -+/* syscall emulation path in ptrace */ -+ -+#ifndef PTRACE_SYSEMU -+#define PTRACE_SYSEMU 31 -+#endif -+ -+void set_using_sysemu(int value); -+int get_using_sysemu(void); -+ - #include "skas_ptregs.h" - - #define HOST_FRAME_SIZE 17 -Index: uml-2.6.7/include/asm-um/archparam-i386.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/archparam-i386.h 2004-07-16 19:37:19.977987960 +0300 -+++ uml-2.6.7/include/asm-um/archparam-i386.h 2004-07-16 19:47:23.787195008 +0300 -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -@@ -56,6 +56,93 @@ - pr_reg[16] = PT_REGS_SS(regs); \ - } while(0); - -+#if 0 /* Turn this back on when UML has VSYSCALL working */ -+#define VSYSCALL_BASE (__fix_to_virt(FIX_VSYSCALL)) -+#else -+#define VSYSCALL_BASE 0 -+#endif -+ -+#define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE) -+#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall) -+extern void *__kernel_vsyscall; -+ -+/* -+ * Architecture-neutral AT_ values in 0-17, leave some room -+ * for more of them, start the x86-specific ones at 32. -+ */ -+#define AT_SYSINFO 32 -+#define AT_SYSINFO_EHDR 33 -+ -+#define ARCH_DLINFO \ -+do { \ -+ NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \ -+ NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \ -+} while (0) -+ -+/* -+ * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out -+ * extra segments containing the vsyscall DSO contents. Dumping its -+ * contents makes post-mortem fully interpretable later without matching up -+ * the same kernel and hardware config to see what PC values meant. -+ * Dumping its extra ELF program headers includes all the other information -+ * a debugger needs to easily find how the vsyscall DSO was being used. -+ */ -+#if 0 -+#define ELF_CORE_EXTRA_PHDRS (VSYSCALL_EHDR->e_phnum) -+#endif -+ -+#undef ELF_CORE_EXTRA_PHDRS -+ -+#if 0 -+#define ELF_CORE_WRITE_EXTRA_PHDRS \ -+do { \ -+ const struct elf_phdr *const vsyscall_phdrs = \ -+ (const struct elf_phdr *) (VSYSCALL_BASE \ -+ + VSYSCALL_EHDR->e_phoff); \ -+ int i; \ -+ Elf32_Off ofs = 0; \ -+ for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ -+ struct elf_phdr phdr = vsyscall_phdrs[i]; \ -+ if (phdr.p_type == PT_LOAD) { \ -+ ofs = phdr.p_offset = offset; \ -+ offset += phdr.p_filesz; \ -+ } \ -+ else \ -+ phdr.p_offset += ofs; \ -+ phdr.p_paddr = 0; /* match other core phdrs */ \ -+ DUMP_WRITE(&phdr, sizeof(phdr)); \ -+ } \ -+} while (0) -+#define ELF_CORE_WRITE_EXTRA_DATA \ -+do { \ -+ const struct elf_phdr *const vsyscall_phdrs = \ -+ (const struct elf_phdr *) (VSYSCALL_BASE \ -+ + VSYSCALL_EHDR->e_phoff); \ -+ int i; \ -+ for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ -+ if (vsyscall_phdrs[i].p_type == PT_LOAD) \ -+ DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \ -+ vsyscall_phdrs[i].p_filesz); \ -+ } \ -+} while (0) -+#endif -+ -+#undef ELF_CORE_WRITE_EXTRA_PHDRS -+#undef ELF_CORE_WRITE_EXTRA_DATA -+ -+#define R_386_NONE 0 -+#define R_386_32 1 -+#define R_386_PC32 2 -+#define R_386_GOT32 3 -+#define R_386_PLT32 4 -+#define R_386_COPY 5 -+#define R_386_GLOB_DAT 6 -+#define R_386_JMP_SLOT 7 -+#define R_386_RELATIVE 8 -+#define R_386_GOTOFF 9 -+#define R_386_GOTPC 10 -+#define R_386_NUM 11 -+ - /********* Bits for asm-um/delay.h **********/ - - typedef unsigned long um_udelay_t; -Index: uml-2.6.7/arch/um/kernel/skas/process_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/process_kern.c 2004-07-16 19:37:52.070109216 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/process_kern.c 2004-07-16 19:47:24.725052432 +0300 -@@ -6,6 +6,12 @@ - #include "linux/sched.h" - #include "linux/slab.h" - #include "linux/ptrace.h" -+#include "linux/proc_fs.h" -+#include "linux/file.h" -+#include "linux/errno.h" -+#include "linux/init.h" -+#include "asm/uaccess.h" -+#include "asm/atomic.h" - #include "kern_util.h" - #include "time_user.h" - #include "signal_user.h" -@@ -17,6 +23,59 @@ - #include "kern.h" - #include "mode.h" - -+#ifdef PTRACE_SYSEMU -+static atomic_t using_sysemu; -+#endif -+ -+void set_using_sysemu(int value) -+{ -+ atomic_set(&using_sysemu, value); -+} -+ -+int get_using_sysemu(void) -+{ -+ return atomic_read(&using_sysemu); -+} -+ -+int proc_read_sysemu(char *buf, char **start, off_t offset, int size,int *eof, void *data) -+{ -+ if (snprintf(buf, size, "%d\n", get_using_sysemu()) < size) /*No overflow*/ -+ *eof = 1; -+ -+ return strlen(buf); -+} -+ -+int proc_write_sysemu(struct file *file,const char *buf, unsigned long count,void *data) -+{ -+ char tmp[2]; -+ -+ if (copy_from_user(tmp, buf, 1)) -+ return -EFAULT; -+ -+ if (tmp[0] == '0' || tmp[0] == '1') -+ set_using_sysemu(tmp[0] - '0'); -+ return count; /*We use the first char, but pretend to write everything*/ -+} -+ -+int __init make_proc_sysemu(void) -+{ -+ struct proc_dir_entry *ent; -+ -+ ent = create_proc_entry("sysemu", 00600, &proc_root); -+ ent->read_proc = proc_read_sysemu; -+ ent->write_proc = proc_write_sysemu; -+ -+ if (ent == NULL) -+ { -+ printk("Failed to register /proc/sysemu\n"); -+ return(0); -+ } -+ -+ return 0; -+} -+ -+late_initcall(make_proc_sysemu); -+ - int singlestepping_skas(void) - { - int ret = current->ptrace & PT_DTRACE; -@@ -61,11 +120,13 @@ - thread_wait(¤t->thread.mode.skas.switch_buf, - current->thread.mode.skas.fork_buf); - --#ifdef CONFIG_SMP -- schedule_tail(NULL); --#endif -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); - current->thread.prev_sched = NULL; - -+ /* The return value is 1 if the kernel thread execs a process, -+ * 0 if it just exits -+ */ - n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); - if(n == 1) - userspace(¤t->thread.regs.regs); -@@ -93,11 +154,11 @@ - current->thread.mode.skas.fork_buf); - - force_flush_all(); --#ifdef CONFIG_SMP -+ if(current->thread.prev_sched == NULL) -+ panic("blech"); -+ - schedule_tail(current->thread.prev_sched); --#endif - current->thread.prev_sched = NULL; -- unblock_signals(); - - userspace(¤t->thread.regs.regs); - } -@@ -136,7 +197,7 @@ - - void init_idle_skas(void) - { -- cpu_tasks[current->thread_info->cpu].pid = os_getpid(); -+ cpu_tasks[current_thread->cpu].pid = os_getpid(); - default_idle(); - } - -@@ -160,11 +221,11 @@ - - int start_uml_skas(void) - { -- start_userspace(); -+ start_userspace(0); - capture_signal_stack(); -+ uml_idle_timer(); - - init_new_thread_signals(1); -- idle_timer(); - - init_task.thread.request.u.thread.proc = start_kernel_proc; - init_task.thread.request.u.thread.arg = NULL; -@@ -175,12 +236,14 @@ - - int external_pid_skas(struct task_struct *task) - { -- return(userspace_pid); -+#warning Need to look up userspace_pid by cpu -+ return(userspace_pid[0]); - } - - int thread_pid_skas(struct task_struct *task) - { -- return(userspace_pid); -+#warning Need to look up userspace_pid by cpu -+ return(userspace_pid[0]); - } - - /* -Index: uml-2.6.7/arch/um/kernel/helper.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/helper.c 2004-07-16 19:36:40.052057624 +0300 -+++ uml-2.6.7/arch/um/kernel/helper.c 2004-07-16 19:47:24.798041336 +0300 -@@ -7,12 +7,12 @@ - #include - #include - #include --#include - #include - #include - #include - #include "user.h" - #include "kern_util.h" -+#include "user_util.h" - #include "os.h" - - struct helper_data { -@@ -33,6 +33,7 @@ - { - struct helper_data *data = arg; - char **argv = data->argv; -+ int errval; - - if(helper_pause){ - signal(SIGHUP, helper_hup); -@@ -41,8 +42,9 @@ - if(data->pre_exec != NULL) - (*data->pre_exec)(data->pre_data); - execvp(argv[0], argv); -+ errval = errno; - printk("execvp of '%s' failed - errno = %d\n", argv[0], errno); -- write(data->fd, &errno, sizeof(errno)); -+ os_write_file(data->fd, &errval, sizeof(errval)); - os_kill_process(os_getpid(), 0); - return(0); - } -@@ -59,17 +61,20 @@ - if((stack_out != NULL) && (*stack_out != 0)) - stack = *stack_out; - else stack = alloc_stack(0, um_in_interrupt()); -- if(stack == 0) return(-ENOMEM); -+ if(stack == 0) -+ return(-ENOMEM); - - err = os_pipe(fds, 1, 0); -- if(err){ -- printk("run_helper : pipe failed, errno = %d\n", -err); -- return(err); -+ if(err < 0){ -+ printk("run_helper : pipe failed, err = %d\n", -err); -+ goto out_free; - } -- if(fcntl(fds[1], F_SETFD, 1) != 0){ -- printk("run_helper : setting FD_CLOEXEC failed, errno = %d\n", -- errno); -- return(-errno); -+ -+ err = os_set_exec_close(fds[1], 1); -+ if(err < 0){ -+ printk("run_helper : setting FD_CLOEXEC failed, err = %d\n", -+ -err); -+ goto out_close; - } - - sp = stack + page_size() - sizeof(void *); -@@ -80,23 +85,34 @@ - pid = clone(helper_child, (void *) sp, CLONE_VM | SIGCHLD, &data); - if(pid < 0){ - printk("run_helper : clone failed, errno = %d\n", errno); -- return(-errno); -+ err = -errno; -+ goto out_close; - } -- close(fds[1]); -- n = read(fds[0], &err, sizeof(err)); -+ -+ os_close_file(fds[1]); -+ n = os_read_file(fds[0], &err, sizeof(err)); - if(n < 0){ -- printk("run_helper : read on pipe failed, errno = %d\n", -- errno); -- return(-errno); -+ printk("run_helper : read on pipe failed, err = %d\n", -n); -+ err = n; -+ goto out_kill; - } - else if(n != 0){ -- waitpid(pid, NULL, 0); -- pid = -err; -+ CATCH_EINTR(n = waitpid(pid, NULL, 0)); -+ pid = -errno; - } - - if(stack_out == NULL) free_stack(stack, 0); - else *stack_out = stack; - return(pid); -+ -+ out_kill: -+ os_kill_process(pid, 1); -+ out_close: -+ os_close_file(fds[0]); -+ os_close_file(fds[1]); -+ out_free: -+ free_stack(stack, 0); -+ return(err); - } - - int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, -@@ -117,9 +133,11 @@ - } - if(stack_out == NULL){ - pid = waitpid(pid, &status, 0); -- if(pid < 0) -+ if(pid < 0){ - printk("run_helper_thread - wait failed, errno = %d\n", -- pid); -+ errno); -+ pid = -errno; -+ } - if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) - printk("run_helper_thread - thread returned status " - "0x%x\n", status); -Index: uml-2.6.7/arch/um/drivers/fd.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/fd.c 2004-07-16 19:37:38.077236456 +0300 -+++ uml-2.6.7/arch/um/drivers/fd.c 2004-07-16 19:47:23.681211120 +0300 -@@ -35,7 +35,8 @@ - printk("fd_init : couldn't parse file descriptor '%s'\n", str); - return(NULL); - } -- if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); -+ data = um_kmalloc(sizeof(*data)); -+ if(data == NULL) return(NULL); - *data = ((struct fd_chan) { .fd = n, - .raw = opts->raw }); - return(data); -Index: uml-2.6.7/arch/um/include/kern_util.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/kern_util.h 2004-07-16 19:36:25.260306312 +0300 -+++ uml-2.6.7/arch/um/include/kern_util.h 2004-07-16 19:47:23.702207928 +0300 -@@ -60,12 +60,11 @@ - extern void paging_init(void); - extern void init_flush_vm(void); - extern void *syscall_sp(void *t); --extern void syscall_trace(void); -+extern void syscall_trace(union uml_pt_regs *regs, int entryexit); - extern int hz(void); --extern void idle_timer(void); -+extern void uml_idle_timer(void); - extern unsigned int do_IRQ(int irq, union uml_pt_regs *regs); - extern int external_pid(void *t); --extern int pid_to_processor_id(int pid); - extern void boot_timer_handler(int sig); - extern void interrupt_end(void); - extern void initial_thread_cb(void (*proc)(void *), void *arg); -@@ -89,9 +88,7 @@ - extern char *uml_strdup(char *string); - extern void unprotect_kernel_mem(void); - extern void protect_kernel_mem(void); --extern void set_kmem_end(unsigned long); - extern void uml_cleanup(void); --extern int pid_to_processor_id(int pid); - extern void set_current(void *t); - extern void lock_signalled_task(void *t); - extern void IPI_handler(int cpu); -@@ -100,7 +97,9 @@ - extern int clear_user_proc(void *buf, int size); - extern int copy_to_user_proc(void *to, void *from, int size); - extern int copy_from_user_proc(void *to, void *from, int size); -+extern int strlen_user_proc(char *str); - extern void bus_handler(int sig, union uml_pt_regs *regs); -+extern void winch(int sig, union uml_pt_regs *regs); - extern long execute_syscall(void *r); - extern int smp_sigio_handler(void); - extern void *get_current(void); -@@ -111,6 +110,8 @@ - extern void free_irq(unsigned int, void *); - extern int um_in_interrupt(void); - extern int cpu(void); -+extern unsigned long long time_stamp(void); -+ - #endif - - /* -Index: uml-2.6.7/include/asm-um/sections.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/sections.h 2004-07-16 19:47:23.634218264 +0300 -+++ uml-2.6.7/include/asm-um/sections.h 2004-07-16 19:47:23.795193792 +0300 -@@ -0,0 +1,7 @@ -+#ifndef _UM_SECTIONS_H -+#define _UM_SECTIONS_H -+ -+/* nothing to see, move along */ -+#include -+ -+#endif -Index: uml-2.6.7/include/asm-um/pgtable.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/pgtable.h 2004-07-16 19:37:51.945128216 +0300 -+++ uml-2.6.7/include/asm-um/pgtable.h 2004-07-16 19:47:23.793194096 +0300 -@@ -12,8 +12,6 @@ - #include "asm/page.h" - #include "asm/fixmap.h" - --extern pgd_t swapper_pg_dir[1024]; -- - extern void *um_virt_to_phys(struct task_struct *task, unsigned long virt, - pte_t *pte_out); - -@@ -49,6 +47,8 @@ - #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) - -+extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -+ - /* - * pgd entries used up by user/kernel: - */ -@@ -65,10 +65,10 @@ - * area for the same reason. ;) - */ - --extern unsigned long high_physmem; -+extern unsigned long end_iomem; - - #define VMALLOC_OFFSET (__va_space) --#define VMALLOC_START (((unsigned long) high_physmem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) -+#define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) - - #ifdef CONFIG_HIGHMEM - # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) -@@ -78,12 +78,13 @@ - - #define _PAGE_PRESENT 0x001 - #define _PAGE_NEWPAGE 0x002 --#define _PAGE_PROTNONE 0x004 /* If not present */ --#define _PAGE_RW 0x008 --#define _PAGE_USER 0x010 --#define _PAGE_ACCESSED 0x020 --#define _PAGE_DIRTY 0x040 --#define _PAGE_NEWPROT 0x080 -+#define _PAGE_NEWPROT 0x004 -+#define _PAGE_FILE 0x008 /* set:pagecache unset:swap */ -+#define _PAGE_PROTNONE 0x010 /* If not present */ -+#define _PAGE_RW 0x020 -+#define _PAGE_USER 0x040 -+#define _PAGE_ACCESSED 0x080 -+#define _PAGE_DIRTY 0x100 - - #define REGION_MASK 0xf0000000 - #define REGION_SHIFT 28 -@@ -143,7 +144,8 @@ - - #define BAD_PAGETABLE __bad_pagetable() - #define BAD_PAGE __bad_page() --#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) -+ -+#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) - - /* number of bits that fit into a memory pointer */ - #define BITS_PER_PTR (8*sizeof(unsigned long)) -@@ -164,9 +166,6 @@ - - #define pte_clear(xp) do { pte_val(*(xp)) = _PAGE_NEWPAGE; } while (0) - --#define phys_region_index(x) (((x) & REGION_MASK) >> REGION_SHIFT) --#define pte_region_index(x) phys_region_index(pte_val(x)) -- - #define pmd_none(x) (!(pmd_val(x) & ~_PAGE_NEWPAGE)) - #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) - #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) -@@ -188,19 +187,25 @@ - - #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) - --extern struct page *pte_mem_map(pte_t pte); --extern struct page *phys_mem_map(unsigned long phys); --extern unsigned long phys_to_pfn(unsigned long p); --extern unsigned long pfn_to_phys(unsigned long pfn); -- --#define pte_page(x) pfn_to_page(pte_pfn(x)) --#define pte_address(x) (__va(pte_val(x) & PAGE_MASK)) --#define mk_phys(a, r) ((a) + (r << REGION_SHIFT)) --#define phys_addr(p) ((p) & ~REGION_MASK) --#define phys_page(p) (phys_mem_map(p) + ((phys_addr(p)) >> PAGE_SHIFT)) -+#define pte_page(pte) phys_to_page(pte_val(pte)) -+#define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK) -+ - #define pte_pfn(x) phys_to_pfn(pte_val(x)) - #define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot)) --#define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot)) -+ -+extern struct page *phys_to_page(const unsigned long phys); -+extern struct page *__virt_to_page(const unsigned long virt); -+#define virt_to_page(addr) __virt_to_page((const unsigned long) addr) -+ -+/* -+ * Bits 0 through 3 are taken -+ */ -+#define PTE_FILE_MAX_BITS 28 -+ -+#define pte_to_pgoff(pte) ((pte).pte_low >> 4) -+ -+#define pgoff_to_pte(off) \ -+ ((pte_t) { ((off) << 4) + _PAGE_FILE }) - - static inline pte_t pte_mknewprot(pte_t pte) - { -@@ -235,6 +240,12 @@ - * The following only work if pte_present() is true. - * Undefined behaviour if not.. - */ -+static inline int pte_user(pte_t pte) -+{ -+ return((pte_val(pte) & _PAGE_USER) && -+ !(pte_val(pte) & _PAGE_PROTNONE)); -+} -+ - static inline int pte_read(pte_t pte) - { - return((pte_val(pte) & _PAGE_USER) && -@@ -252,6 +263,14 @@ - !(pte_val(pte) & _PAGE_PROTNONE)); - } - -+/* -+ * The following only works if pte_present() is not true. -+ */ -+static inline int pte_file(pte_t pte) -+{ -+ return (pte).pte_low & _PAGE_FILE; -+} -+ - static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } - static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } - static inline int pte_newpage(pte_t pte) { return pte_val(pte) & _PAGE_NEWPAGE; } -@@ -334,14 +353,7 @@ - * and a page entry and page directory to the page they refer to. - */ - --#define mk_pte(page, pgprot) \ --({ \ -- pte_t __pte; \ -- \ -- pte_val(__pte) = page_to_phys(page) + pgprot_val(pgprot);\ -- if(pte_present(__pte)) pte_mknewprot(pte_mknewpage(__pte)); \ -- __pte; \ --}) -+extern pte_t mk_pte(struct page *page, pgprot_t pgprot); - - static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) - { -@@ -351,17 +363,27 @@ - } - - #define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) --#define pmd_page(pmd) (phys_mem_map(pmd_val(pmd) & PAGE_MASK) + \ -- ((phys_addr(pmd_val(pmd)) >> PAGE_SHIFT))) - --/* to find an entry in a page-table-directory. */ -+/* -+ * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] -+ * -+ * this macro returns the index of the entry in the pgd page which would -+ * control the given virtual address -+ */ - #define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) - --/* to find an entry in a page-table-directory */ -+/* -+ * pgd_offset() returns a (pgd_t *) -+ * pgd_index() is used get the offset into the pgd page's array of pgd_t's; -+ */ - #define pgd_offset(mm, address) \ - ((mm)->pgd + ((address) >> PGDIR_SHIFT)) - --/* to find an entry in a kernel page-table-directory */ -+ -+/* -+ * a shortcut which implies the use of the kernel's pgd, instead -+ * of a process's -+ */ - #define pgd_offset_k(address) pgd_offset(&init_mm, address) - - #define pmd_index(address) \ -@@ -373,7 +395,12 @@ - return (pmd_t *) dir; - } - --/* Find an entry in the third-level page table.. */ -+/* -+ * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] -+ * -+ * this macro returns the index of the entry in the pte page which would -+ * control the given virtual address -+ */ - #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) - #define pte_offset_kernel(dir, address) \ - ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) -@@ -387,11 +414,11 @@ - #define update_mmu_cache(vma,address,pte) do ; while (0) - - /* Encode and de-code a swap entry */ --#define __swp_type(x) (((x).val >> 3) & 0x7f) --#define __swp_offset(x) ((x).val >> 10) -+#define __swp_type(x) (((x).val >> 4) & 0x3f) -+#define __swp_offset(x) ((x).val >> 11) - - #define __swp_entry(type, offset) \ -- ((swp_entry_t) { ((type) << 3) | ((offset) << 10) }) -+ ((swp_entry_t) { ((type) << 4) | ((offset) << 11) }) - #define __pte_to_swp_entry(pte) \ - ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) - #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -Index: uml-2.6.7/arch/um/Kconfig_char -=================================================================== ---- uml-2.6.7.orig/arch/um/Kconfig_char 2004-07-16 19:36:33.679026472 +0300 -+++ uml-2.6.7/arch/um/Kconfig_char 2004-07-16 19:47:24.069152144 +0300 -@@ -108,11 +108,60 @@ - - config UNIX98_PTYS - bool "Unix98 PTY support" -- --config UNIX98_PTY_COUNT -- int "Maximum number of Unix98 PTYs in use (0-2048)" -- depends on UNIX98_PTYS -+ ---help--- -+ A pseudo terminal (PTY) is a software device consisting of two -+ halves: a master and a slave. The slave device behaves identical to -+ a physical terminal; the master device is used by a process to -+ read data from and write data to the slave, thereby emulating a -+ terminal. Typical programs for the master side are telnet servers -+ and xterms. -+ -+ Linux has traditionally used the BSD-like names /dev/ptyxx for -+ masters and /dev/ttyxx for slaves of pseudo terminals. This scheme -+ has a number of problems. The GNU C library glibc 2.1 and later, -+ however, supports the Unix98 naming standard: in order to acquire a -+ pseudo terminal, a process opens /dev/ptmx; the number of the pseudo -+ terminal is then made available to the process and the pseudo -+ terminal slave can be accessed as /dev/pts/. What was -+ traditionally /dev/ttyp2 will then be /dev/pts/2, for example. -+ -+ All modern Linux systems use the Unix98 ptys. Say Y unless -+ you're on an embedded system and want to conserve memory. -+ -+config LEGACY_PTYS -+ bool "Legacy (BSD) PTY support" -+ default y -+ ---help--- -+ A pseudo terminal (PTY) is a software device consisting of two -+ halves: a master and a slave. The slave device behaves identical to -+ a physical terminal; the master device is used by a process to -+ read data from and write data to the slave, thereby emulating a -+ terminal. Typical programs for the master side are telnet servers -+ and xterms. -+ -+ Linux has traditionally used the BSD-like names /dev/ptyxx -+ for masters and /dev/ttyxx for slaves of pseudo -+ terminals. This scheme has a number of problems, including -+ security. This option enables these legacy devices; on most -+ systems, it is safe to say N. -+ -+ -+config LEGACY_PTY_COUNT -+ int "Maximum number of legacy PTY in use" -+ depends on LEGACY_PTYS - default "256" -+ ---help--- -+ The maximum number of legacy PTYs that can be used at any one time. -+ The default is 256, and should be more than enough. Embedded -+ systems may want to reduce this to save memory. -+ -+ When not in use, each legacy PTY occupies 12 bytes on 32-bit -+ architectures and 24 bytes on 64-bit architectures. -+ -+#config UNIX98_PTY_COUNT -+# int "Maximum number of Unix98 PTYs in use (0-2048)" -+# depends on UNIX98_PTYS -+# default "256" - - config WATCHDOG - bool "Watchdog Timer Support" -Index: uml-2.6.7/arch/um/kernel/user_util.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/user_util.c 2004-07-16 19:36:07.233046872 +0300 -+++ uml-2.6.7/arch/um/kernel/user_util.c 2004-07-16 19:47:24.794041944 +0300 -@@ -5,7 +5,6 @@ - - #include - #include --#include - #include - #include - #include -@@ -82,10 +81,10 @@ - int status, ret; - - while(1){ -- if(((ret = waitpid(pid, &status, WUNTRACED)) < 0) || -+ CATCH_EINTR(ret = waitpid(pid, &status, WUNTRACED)); -+ if((ret < 0) || - !WIFSTOPPED(status) || (WSTOPSIG(status) != sig)){ - if(ret < 0){ -- if(errno == EINTR) continue; - printk("wait failed, errno = %d\n", - errno); - } -@@ -119,29 +118,36 @@ - } - } - --int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags) --{ -- int pid; -- -- pid = clone(fn, sp, flags, arg); -- if(pid < 0) return(-1); -- wait_for_stop(pid, SIGSTOP, PTRACE_CONT, NULL); -- ptrace(PTRACE_CONT, pid, 0, 0); -- return(pid); --} -- --int raw(int fd, int complain) -+int __raw(int fd, int complain, int now) - { - struct termios tt; - int err; -+ int when; -+ -+ CATCH_EINTR(err = tcgetattr(fd, &tt)); -+ -+ if (err < 0) { -+ if (complain) -+ printk("tcgetattr failed, errno = %d\n", errno); -+ return(-errno); -+ } - -- tcgetattr(fd, &tt); - cfmakeraw(&tt); -- err = tcsetattr(fd, TCSANOW, &tt); -- if((err < 0) && complain){ -- printk("tcsetattr failed, errno = %d\n", errno); -+ -+ if (now) -+ when = TCSANOW; -+ else -+ when = TCSADRAIN; -+ -+ CATCH_EINTR(err = tcsetattr(fd, when, &tt)); -+ -+ if (err < 0) { -+ if (complain) -+ printk("tcsetattr failed, errno = %d\n", errno); - return(-errno); - } -+ /*XXX: tcsetattr could have applied only some changes -+ * (and cfmakeraw() is a set of changes) */ - return(0); - } - -Index: uml-2.6.7/arch/um/kernel/umid.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/umid.c 2004-07-16 19:37:31.857182048 +0300 -+++ uml-2.6.7/arch/um/kernel/umid.c 2004-07-16 19:47:23.756199720 +0300 -@@ -5,7 +5,6 @@ - - #include - #include --#include - #include - #include - #include -@@ -33,18 +32,19 @@ - static int umid_is_random = 1; - static int umid_inited = 0; - --static int make_umid(void); -+static int make_umid(int (*printer)(const char *fmt, ...)); - --static int __init set_umid(char *name, int is_random) -+static int __init set_umid(char *name, int is_random, -+ int (*printer)(const char *fmt, ...)) - { - if(umid_inited){ -- printk("Unique machine name can't be set twice\n"); -+ (*printer)("Unique machine name can't be set twice\n"); - return(-1); - } - - if(strlen(name) > UMID_LEN - 1) -- printk("Unique machine name is being truncated to %s " -- "characters\n", UMID_LEN); -+ (*printer)("Unique machine name is being truncated to %s " -+ "characters\n", UMID_LEN); - strlcpy(umid, name, sizeof(umid)); - - umid_is_random = is_random; -@@ -54,7 +54,7 @@ - - static int __init set_umid_arg(char *name, int *add) - { -- return(set_umid(name, 0)); -+ return(set_umid(name, 0, printf)); - } - - __uml_setup("umid=", set_umid_arg, -@@ -67,7 +67,7 @@ - { - int n; - -- if(!umid_inited && make_umid()) return(-1); -+ if(!umid_inited && make_umid(printk)) return(-1); - - n = strlen(uml_dir) + strlen(umid) + strlen(name) + 1; - if(n > len){ -@@ -85,22 +85,23 @@ - { - char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; - char pid[sizeof("nnnnn\0")]; -- int fd; -+ int fd, n; - - if(umid_file_name("pid", file, sizeof(file))) return 0; - - fd = os_open_file(file, of_create(of_excl(of_rdwr(OPENFLAGS()))), - 0644); - if(fd < 0){ -- printk("Open of machine pid file \"%s\" failed - " -- "errno = %d\n", file, -fd); -+ printf("Open of machine pid file \"%s\" failed - " -+ "err = %d\n", file, -fd); - return 0; - } - - sprintf(pid, "%d\n", os_getpid()); -- if(write(fd, pid, strlen(pid)) != strlen(pid)) -- printk("Write of pid file failed - errno = %d\n", errno); -- close(fd); -+ n = os_write_file(fd, pid, strlen(pid)); -+ if(n != strlen(pid)) -+ printf("Write of pid file failed - err = %d\n", -n); -+ os_close_file(fd); - return 0; - } - -@@ -111,7 +112,8 @@ - int len; - char file[256]; - -- if((directory = opendir(dir)) == NULL){ -+ directory = opendir(dir); -+ if(directory == NULL){ - printk("actually_do_remove : couldn't open directory '%s', " - "errno = %d\n", dir, errno); - return(1); -@@ -160,22 +162,24 @@ - { - char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; - char pid[sizeof("nnnnn\0")], *end; -- int dead, fd, p; -+ int dead, fd, p, n; - - sprintf(file, "%s/pid", dir); - dead = 0; -- if((fd = os_open_file(file, of_read(OPENFLAGS()), 0)) < 0){ -+ fd = os_open_file(file, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ - if(fd != -ENOENT){ - printk("not_dead_yet : couldn't open pid file '%s', " -- "errno = %d\n", file, -fd); -+ "err = %d\n", file, -fd); - return(1); - } - dead = 1; - } - if(fd > 0){ -- if(read(fd, pid, sizeof(pid)) < 0){ -+ n = os_read_file(fd, pid, sizeof(pid)); -+ if(n < 0){ - printk("not_dead_yet : couldn't read pid file '%s', " -- "errno = %d\n", file, errno); -+ "err = %d\n", file, -n); - return(1); - } - p = strtoul(pid, &end, 0); -@@ -197,7 +201,7 @@ - if((strlen(name) > 0) && (name[strlen(name) - 1] != '/')){ - uml_dir = malloc(strlen(name) + 1); - if(uml_dir == NULL){ -- printk("Failed to malloc uml_dir - error = %d\n", -+ printf("Failed to malloc uml_dir - error = %d\n", - errno); - uml_dir = name; - return(0); -@@ -217,7 +221,7 @@ - char *home = getenv("HOME"); - - if(home == NULL){ -- printk("make_uml_dir : no value in environment for " -+ printf("make_uml_dir : no value in environment for " - "$HOME\n"); - exit(1); - } -@@ -232,57 +236,59 @@ - dir[len + 1] = '\0'; - } - -- if((uml_dir = malloc(strlen(dir) + 1)) == NULL){ -+ uml_dir = malloc(strlen(dir) + 1); -+ if(uml_dir == NULL){ - printf("make_uml_dir : malloc failed, errno = %d\n", errno); - exit(1); - } - strcpy(uml_dir, dir); - - if((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)){ -- printk("Failed to mkdir %s - errno = %i\n", uml_dir, errno); -+ printf("Failed to mkdir %s - errno = %i\n", uml_dir, errno); - return(-1); - } - return 0; - } - --static int __init make_umid(void) -+static int __init make_umid(int (*printer)(const char *fmt, ...)) - { - int fd, err; - char tmp[strlen(uml_dir) + UMID_LEN + 1]; - - strlcpy(tmp, uml_dir, sizeof(tmp)); - -- if(*umid == 0){ -+ if(!umid_inited){ - strcat(tmp, "XXXXXX"); - fd = mkstemp(tmp); - if(fd < 0){ -- printk("make_umid - mkstemp failed, errno = %d\n", -- errno); -+ (*printer)("make_umid - mkstemp failed, errno = %d\n", -+ errno); - return(1); - } - -- close(fd); -+ os_close_file(fd); - /* There's a nice tiny little race between this unlink and - * the mkdir below. It'd be nice if there were a mkstemp - * for directories. - */ - unlink(tmp); -- set_umid(&tmp[strlen(uml_dir)], 1); -+ set_umid(&tmp[strlen(uml_dir)], 1, printer); - } - - sprintf(tmp, "%s%s", uml_dir, umid); - -- if((err = mkdir(tmp, 0777)) < 0){ -+ err = mkdir(tmp, 0777); -+ if(err < 0){ - if(errno == EEXIST){ - if(not_dead_yet(tmp)){ -- printk("umid '%s' is in use\n", umid); -+ (*printer)("umid '%s' is in use\n", umid); - return(-1); - } - err = mkdir(tmp, 0777); - } - } - if(err < 0){ -- printk("Failed to create %s - errno = %d\n", umid, errno); -+ (*printer)("Failed to create %s - errno = %d\n", umid, errno); - return(-1); - } - -@@ -295,7 +301,13 @@ - ); - - __uml_postsetup(make_uml_dir); --__uml_postsetup(make_umid); -+ -+static int __init make_umid_setup(void) -+{ -+ return(make_umid(printf)); -+} -+ -+__uml_postsetup(make_umid_setup); - __uml_postsetup(create_pid_file); - - /* -Index: uml-2.6.7/include/linux/ghash.h -=================================================================== ---- uml-2.6.7.orig/include/linux/ghash.h 2004-07-16 19:47:23.863183456 +0300 -+++ uml-2.6.7/include/linux/ghash.h 2004-07-16 19:47:23.875181632 +0300 -@@ -0,0 +1,236 @@ -+/* -+ * include/linux/ghash.h -- generic hashing with fuzzy retrieval -+ * -+ * (C) 1997 Thomas Schoebel-Theuer -+ * -+ * The algorithms implemented here seem to be a completely new invention, -+ * and I'll publish the fundamentals in a paper. -+ */ -+ -+#ifndef _GHASH_H -+#define _GHASH_H -+/* HASHSIZE _must_ be a power of two!!! */ -+ -+ -+#define DEF_HASH_FUZZY_STRUCTS(NAME,HASHSIZE,TYPE) \ -+\ -+struct NAME##_table {\ -+ TYPE * hashtable[HASHSIZE];\ -+ TYPE * sorted_list;\ -+ int nr_entries;\ -+};\ -+\ -+struct NAME##_ptrs {\ -+ TYPE * next_hash;\ -+ TYPE * prev_hash;\ -+ TYPE * next_sorted;\ -+ TYPE * prev_sorted;\ -+}; -+ -+#define DEF_HASH_FUZZY(LINKAGE,NAME,HASHSIZE,TYPE,PTRS,KEYTYPE,KEY,KEYCMP,KEYEQ,HASHFN)\ -+\ -+LINKAGE void insert_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\ -+{\ -+ int ix = HASHFN(elem->KEY);\ -+ TYPE ** base = &tbl->hashtable[ix];\ -+ TYPE * ptr = *base;\ -+ TYPE * prev = NULL;\ -+\ -+ tbl->nr_entries++;\ -+ while(ptr && KEYCMP(ptr->KEY, elem->KEY)) {\ -+ base = &ptr->PTRS.next_hash;\ -+ prev = ptr;\ -+ ptr = *base;\ -+ }\ -+ elem->PTRS.next_hash = ptr;\ -+ elem->PTRS.prev_hash = prev;\ -+ if(ptr) {\ -+ ptr->PTRS.prev_hash = elem;\ -+ }\ -+ *base = elem;\ -+\ -+ ptr = prev;\ -+ if(!ptr) {\ -+ ptr = tbl->sorted_list;\ -+ prev = NULL;\ -+ } else {\ -+ prev = ptr->PTRS.prev_sorted;\ -+ }\ -+ while(ptr) {\ -+ TYPE * next = ptr->PTRS.next_hash;\ -+ if(next && KEYCMP(next->KEY, elem->KEY)) {\ -+ prev = ptr;\ -+ ptr = next;\ -+ } else if(KEYCMP(ptr->KEY, elem->KEY)) {\ -+ prev = ptr;\ -+ ptr = ptr->PTRS.next_sorted;\ -+ } else\ -+ break;\ -+ }\ -+ elem->PTRS.next_sorted = ptr;\ -+ elem->PTRS.prev_sorted = prev;\ -+ if(ptr) {\ -+ ptr->PTRS.prev_sorted = elem;\ -+ }\ -+ if(prev) {\ -+ prev->PTRS.next_sorted = elem;\ -+ } else {\ -+ tbl->sorted_list = elem;\ -+ }\ -+}\ -+\ -+LINKAGE void remove_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\ -+{\ -+ TYPE * next = elem->PTRS.next_hash;\ -+ TYPE * prev = elem->PTRS.prev_hash;\ -+\ -+ tbl->nr_entries--;\ -+ if(next)\ -+ next->PTRS.prev_hash = prev;\ -+ if(prev)\ -+ prev->PTRS.next_hash = next;\ -+ else {\ -+ int ix = HASHFN(elem->KEY);\ -+ tbl->hashtable[ix] = next;\ -+ }\ -+\ -+ next = elem->PTRS.next_sorted;\ -+ prev = elem->PTRS.prev_sorted;\ -+ if(next)\ -+ next->PTRS.prev_sorted = prev;\ -+ if(prev)\ -+ prev->PTRS.next_sorted = next;\ -+ else\ -+ tbl->sorted_list = next;\ -+}\ -+\ -+LINKAGE TYPE * find_##NAME##_hash(struct NAME##_table * tbl, KEYTYPE pos)\ -+{\ -+ int ix = hashfn(pos);\ -+ TYPE * ptr = tbl->hashtable[ix];\ -+ while(ptr && KEYCMP(ptr->KEY, pos))\ -+ ptr = ptr->PTRS.next_hash;\ -+ if(ptr && !KEYEQ(ptr->KEY, pos))\ -+ ptr = NULL;\ -+ return ptr;\ -+}\ -+\ -+LINKAGE TYPE * find_##NAME##_hash_fuzzy(struct NAME##_table * tbl, KEYTYPE pos)\ -+{\ -+ int ix;\ -+ int offset;\ -+ TYPE * ptr;\ -+ TYPE * next;\ -+\ -+ ptr = tbl->sorted_list;\ -+ if(!ptr || KEYCMP(pos, ptr->KEY))\ -+ return NULL;\ -+ ix = HASHFN(pos);\ -+ offset = HASHSIZE;\ -+ do {\ -+ offset >>= 1;\ -+ next = tbl->hashtable[(ix+offset) & ((HASHSIZE)-1)];\ -+ if(next && (KEYCMP(next->KEY, pos) || KEYEQ(next->KEY, pos))\ -+ && KEYCMP(ptr->KEY, next->KEY))\ -+ ptr = next;\ -+ } while(offset);\ -+\ -+ for(;;) {\ -+ next = ptr->PTRS.next_hash;\ -+ if(next) {\ -+ if(KEYCMP(next->KEY, pos)) {\ -+ ptr = next;\ -+ continue;\ -+ }\ -+ }\ -+ next = ptr->PTRS.next_sorted;\ -+ if(next && KEYCMP(next->KEY, pos)) {\ -+ ptr = next;\ -+ continue;\ -+ }\ -+ return ptr;\ -+ }\ -+ return NULL;\ -+} -+ -+/* LINKAGE - empty or "static", depending on whether you want the definitions to -+ * be public or not -+ * NAME - a string to stick in names to make this hash table type distinct from -+ * any others -+ * HASHSIZE - number of buckets -+ * TYPE - type of data contained in the buckets - must be a structure, one -+ * field is of type NAME_ptrs, another is the hash key -+ * PTRS - TYPE must contain a field of type NAME_ptrs, PTRS is the name of that -+ * field -+ * KEYTYPE - type of the key field within TYPE -+ * KEY - name of the key field within TYPE -+ * KEYCMP - pointer to function that compares KEYTYPEs to each other - the -+ * prototype is int KEYCMP(KEYTYPE, KEYTYPE), it returns zero for equal, -+ * non-zero for not equal -+ * HASHFN - the hash function - the prototype is int HASHFN(KEYTYPE), -+ * it returns a number in the range 0 ... HASHSIZE - 1 -+ * Call DEF_HASH_STRUCTS, define your hash table as a NAME_table, then call -+ * DEF_HASH. -+ */ -+ -+#define DEF_HASH_STRUCTS(NAME,HASHSIZE,TYPE) \ -+\ -+struct NAME##_table {\ -+ TYPE * hashtable[HASHSIZE];\ -+ int nr_entries;\ -+};\ -+\ -+struct NAME##_ptrs {\ -+ TYPE * next_hash;\ -+ TYPE * prev_hash;\ -+}; -+ -+#define DEF_HASH(LINKAGE,NAME,TYPE,PTRS,KEYTYPE,KEY,KEYCMP,HASHFN)\ -+\ -+LINKAGE void insert_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\ -+{\ -+ int ix = HASHFN(elem->KEY);\ -+ TYPE ** base = &tbl->hashtable[ix];\ -+ TYPE * ptr = *base;\ -+ TYPE * prev = NULL;\ -+\ -+ tbl->nr_entries++;\ -+ while(ptr && KEYCMP(ptr->KEY, elem->KEY)) {\ -+ base = &ptr->PTRS.next_hash;\ -+ prev = ptr;\ -+ ptr = *base;\ -+ }\ -+ elem->PTRS.next_hash = ptr;\ -+ elem->PTRS.prev_hash = prev;\ -+ if(ptr) {\ -+ ptr->PTRS.prev_hash = elem;\ -+ }\ -+ *base = elem;\ -+}\ -+\ -+LINKAGE void remove_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\ -+{\ -+ TYPE * next = elem->PTRS.next_hash;\ -+ TYPE * prev = elem->PTRS.prev_hash;\ -+\ -+ tbl->nr_entries--;\ -+ if(next)\ -+ next->PTRS.prev_hash = prev;\ -+ if(prev)\ -+ prev->PTRS.next_hash = next;\ -+ else {\ -+ int ix = HASHFN(elem->KEY);\ -+ tbl->hashtable[ix] = next;\ -+ }\ -+}\ -+\ -+LINKAGE TYPE * find_##NAME##_hash(struct NAME##_table * tbl, KEYTYPE pos)\ -+{\ -+ int ix = HASHFN(pos);\ -+ TYPE * ptr = tbl->hashtable[ix];\ -+ while(ptr && KEYCMP(ptr->KEY, pos))\ -+ ptr = ptr->PTRS.next_hash;\ -+ return ptr;\ -+} -+ -+#endif -Index: uml-2.6.7/arch/um/kernel/tt/include/mode.h -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/include/mode.h 2004-07-16 19:36:59.998025376 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/include/mode.h 2004-07-16 19:47:23.743201696 +0300 -@@ -8,6 +8,8 @@ - - #include "sysdep/ptrace.h" - -+enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; -+ - extern int tracing_pid; - - extern int tracer(int (*init_proc)(void *), void *sp); -Index: uml-2.6.7/arch/um/include/os.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/os.h 2004-07-16 19:36:10.807503472 +0300 -+++ uml-2.6.7/arch/um/include/os.h 2004-07-16 19:47:23.705207472 +0300 -@@ -17,6 +17,32 @@ - #define OS_TYPE_FIFO 6 - #define OS_TYPE_SOCK 7 - -+/* os_access() flags */ -+#define OS_ACC_F_OK 0 /* Test for existence. */ -+#define OS_ACC_X_OK 1 /* Test for execute permission. */ -+#define OS_ACC_W_OK 2 /* Test for write permission. */ -+#define OS_ACC_R_OK 4 /* Test for read permission. */ -+#define OS_ACC_RW_OK (OS_ACC_W_OK | OS_ACC_R_OK) /* Test for RW permission */ -+ -+/* -+ * types taken from stat_file() in hostfs_user.c -+ * (if they are wrong here, they are wrong there...). -+ */ -+struct uml_stat { -+ int ust_dev; /* device */ -+ unsigned long long ust_ino; /* inode */ -+ int ust_mode; /* protection */ -+ int ust_nlink; /* number of hard links */ -+ int ust_uid; /* user ID of owner */ -+ int ust_gid; /* group ID of owner */ -+ unsigned long long ust_size; /* total size, in bytes */ -+ int ust_blksize; /* blocksize for filesystem I/O */ -+ unsigned long long ust_blocks; /* number of blocks allocated */ -+ unsigned long ust_atime; /* time of last access */ -+ unsigned long ust_mtime; /* time of last modification */ -+ unsigned long ust_ctime; /* time of last change */ -+}; -+ - struct openflags { - unsigned int r : 1; - unsigned int w : 1; -@@ -84,29 +110,47 @@ - flags.e = 1; - return(flags); - } -- -+ - static inline struct openflags of_cloexec(struct openflags flags) - { - flags.cl = 1; - return(flags); - } - -+extern int os_stat_file(const char *file_name, struct uml_stat *buf); -+extern int os_stat_fd(const int fd, struct uml_stat *buf); -+extern int os_access(const char *file, int mode); -+extern void os_print_error(int error, const char* str); -+extern int os_get_exec_close(int fd, int *close_on_exec); -+extern int os_set_exec_close(int fd, int close_on_exec); -+extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); -+extern int os_window_size(int fd, int *rows, int *cols); -+extern int os_new_tty_pgrp(int fd, int pid); -+extern int os_get_ifname(int fd, char *namebuf); -+extern int os_set_slip(int fd); -+extern int os_set_owner(int fd, int pid); -+extern int os_sigio_async(int master, int slave); -+extern int os_mode_fd(int fd, int mode); -+ - extern int os_seek_file(int fd, __u64 offset); - extern int os_open_file(char *file, struct openflags flags, int mode); - extern int os_read_file(int fd, void *buf, int len); --extern int os_write_file(int fd, void *buf, int count); -+extern int os_write_file(int fd, const void *buf, int count); - extern int os_file_size(char *file, long long *size_out); -+extern int os_file_modtime(char *file, unsigned long *modtime); - extern int os_pipe(int *fd, int stream, int close_on_exec); - extern int os_set_fd_async(int fd, int owner); - extern int os_set_fd_block(int fd, int blocking); - extern int os_accept_connection(int fd); -+extern int os_create_unix_socket(char *file, int len, int close_on_exec); - extern int os_shutdown_socket(int fd, int r, int w); - extern void os_close_file(int fd); - extern int os_rcv_fd(int fd, int *helper_pid_out); --extern int create_unix_socket(char *file, int len); -+extern int create_unix_socket(char *file, int len, int close_on_exec); - extern int os_connect_socket(char *name); - extern int os_file_type(char *file); - extern int os_file_mode(char *file, struct openflags *mode_out); -+extern int os_lock_file(int fd, int excl); - - extern unsigned long os_process_pc(int pid); - extern int os_process_parent(int pid); -@@ -115,11 +159,12 @@ - extern void os_usr1_process(int pid); - extern int os_getpid(void); - --extern int os_map_memory(void *virt, int fd, unsigned long off, -+extern int os_map_memory(void *virt, int fd, unsigned long long off, - unsigned long len, int r, int w, int x); - extern int os_protect_memory(void *addr, unsigned long len, - int r, int w, int x); - extern int os_unmap_memory(void *addr, int len); -+extern void os_flush_stdout(void); - - #endif - -Index: uml-2.6.7/arch/um/kernel/tt/exec_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/exec_user.c 2004-07-16 19:37:20.241947832 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/exec_user.c 2004-07-16 19:47:24.799041184 +0300 -@@ -19,13 +19,18 @@ - void do_exec(int old_pid, int new_pid) - { - unsigned long regs[FRAME_SIZE]; -+ int err; - - if((ptrace(PTRACE_ATTACH, new_pid, 0, 0) < 0) || -- (ptrace(PTRACE_CONT, new_pid, 0, 0) < 0) || -- (waitpid(new_pid, 0, WUNTRACED) < 0)) -+ (ptrace(PTRACE_CONT, new_pid, 0, 0) < 0)) - tracer_panic("do_exec failed to attach proc - errno = %d", - errno); - -+ CATCH_EINTR(err = waitpid(new_pid, 0, WUNTRACED)); -+ if (err < 0) -+ tracer_panic("do_exec failed to attach proc in waitpid - errno = %d", -+ errno); -+ - if(ptrace_getregs(old_pid, regs) < 0) - tracer_panic("do_exec failed to get registers - errno = %d", - errno); -Index: uml-2.6.7/include/asm-um/timex.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/timex.h 2004-07-16 19:36:59.398116576 +0300 -+++ uml-2.6.7/include/asm-um/timex.h 2004-07-16 19:47:23.803192576 +0300 -@@ -1,8 +1,6 @@ - #ifndef __UM_TIMEX_H - #define __UM_TIMEX_H - --#include "linux/time.h" -- - typedef unsigned long cycles_t; - - #define cacheflush_time (0) -Index: uml-2.6.7/arch/um/kernel/skas/include/skas.h -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/include/skas.h 2004-07-16 19:36:19.759142616 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/include/skas.h 2004-07-16 19:47:23.727204128 +0300 -@@ -8,7 +8,7 @@ - - #include "sysdep/ptrace.h" - --extern int userspace_pid; -+extern int userspace_pid[]; - - extern void switch_threads(void *me, void *next); - extern void thread_wait(void *sw, void *fb); -@@ -32,7 +32,7 @@ - extern int new_mm(int from); - extern void save_registers(union uml_pt_regs *regs); - extern void restore_registers(union uml_pt_regs *regs); --extern void start_userspace(void); -+extern void start_userspace(int cpu); - extern void init_registers(int pid); - - #endif -Index: uml-2.6.7/arch/um/drivers/mmapper_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/mmapper_kern.c 2004-07-16 19:36:04.585449368 +0300 -+++ uml-2.6.7/arch/um/drivers/mmapper_kern.c 2004-07-16 19:47:23.688210056 +0300 -@@ -120,7 +120,10 @@ - printk(KERN_INFO "Mapper v0.1\n"); - - v_buf = (char *) find_iomem("mmapper", &mmapper_size); -- if(mmapper_size == 0) return(0); -+ if(mmapper_size == 0){ -+ printk(KERN_ERR "mmapper_init - find_iomem failed\n"); -+ return(0); -+ } - - p_buf = __pa(v_buf); - -Index: uml-2.6.7/arch/um/os-Linux/process.c -=================================================================== ---- uml-2.6.7.orig/arch/um/os-Linux/process.c 2004-07-16 19:36:02.092828304 +0300 -+++ uml-2.6.7/arch/um/os-Linux/process.c 2004-07-16 19:47:24.802040728 +0300 -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2002 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -@@ -7,32 +7,38 @@ - #include - #include - #include -+#include - #include - #include - #include "os.h" - #include "user.h" -+#include "user_util.h" -+ -+#define ARBITRARY_ADDR -1 -+#define FAILURE_PID -1 - - unsigned long os_process_pc(int pid) - { - char proc_stat[sizeof("/proc/#####/stat\0")], buf[256]; - unsigned long pc; -- int fd; -+ int fd, err; - - sprintf(proc_stat, "/proc/%d/stat", pid); - fd = os_open_file(proc_stat, of_read(OPENFLAGS()), 0); - if(fd < 0){ -- printk("os_process_pc - couldn't open '%s', errno = %d\n", -- proc_stat, errno); -- return(-1); -+ printk("os_process_pc - couldn't open '%s', err = %d\n", -+ proc_stat, -fd); -+ return(ARBITRARY_ADDR); - } -- if(read(fd, buf, sizeof(buf)) < 0){ -- printk("os_process_pc - couldn't read '%s', errno = %d\n", -- proc_stat, errno); -- close(fd); -- return(-1); -+ err = os_read_file(fd, buf, sizeof(buf)); -+ if(err < 0){ -+ printk("os_process_pc - couldn't read '%s', err = %d\n", -+ proc_stat, -err); -+ os_close_file(fd); -+ return(ARBITRARY_ADDR); - } -- close(fd); -- pc = -1; -+ os_close_file(fd); -+ pc = ARBITRARY_ADDR; - if(sscanf(buf, "%*d %*s %*c %*d %*d %*d %*d %*d %*d %*d %*d " - "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " - "%*d %*d %*d %*d %ld", &pc) != 1){ -@@ -52,22 +58,23 @@ - snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); - fd = os_open_file(stat, of_read(OPENFLAGS()), 0); - if(fd < 0){ -- printk("Couldn't open '%s', errno = %d\n", stat, -fd); -- return(-1); -+ printk("Couldn't open '%s', err = %d\n", stat, -fd); -+ return(FAILURE_PID); - } - -- n = read(fd, data, sizeof(data)); -- close(fd); -+ n = os_read_file(fd, data, sizeof(data)); -+ os_close_file(fd); - - if(n < 0){ -- printk("Couldn't read '%s', errno = %d\n", stat); -- return(-1); -+ printk("Couldn't read '%s', err = %d\n", stat, -n); -+ return(FAILURE_PID); - } - -- parent = -1; -+ parent = FAILURE_PID; - /* XXX This will break if there is a space in the command */ - n = sscanf(data, "%*d %*s %*c %d", &parent); -- if(n != 1) printk("Failed to scan '%s'\n", data); -+ if(n != 1) -+ printk("Failed to scan '%s'\n", data); - - return(parent); - } -@@ -81,13 +88,17 @@ - { - kill(pid, SIGKILL); - if(reap_child) -- waitpid(pid, NULL, 0); -+ CATCH_EINTR(waitpid(pid, NULL, 0)); - - } - - void os_usr1_process(int pid) - { -+#ifdef __NR_tkill -+ syscall(__NR_tkill, pid, SIGUSR1); -+#else - kill(pid, SIGUSR1); -+#endif - } - - int os_getpid(void) -@@ -95,7 +106,7 @@ - return(getpid()); - } - --int os_map_memory(void *virt, int fd, unsigned long off, unsigned long len, -+int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, - int r, int w, int x) - { - void *loc; -@@ -104,8 +115,8 @@ - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | - (x ? PROT_EXEC : 0); - -- loc = mmap((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, -- fd, off); -+ loc = mmap64((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, -+ fd, off); - if(loc == MAP_FAILED) - return(-errno); - return(0); -@@ -126,7 +137,8 @@ - int err; - - err = munmap(addr, len); -- if(err < 0) return(-errno); -+ if(err < 0) -+ return(-errno); - return(0); - } - -Index: uml-2.6.7/arch/um/include/mem_user.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/mem_user.h 2004-07-16 19:37:02.661620448 +0300 -+++ uml-2.6.7/arch/um/include/mem_user.h 2004-07-16 19:47:23.704207624 +0300 -@@ -32,43 +32,38 @@ - #ifndef _MEM_USER_H - #define _MEM_USER_H - --struct mem_region { -+struct iomem_region { -+ struct iomem_region *next; - char *driver; -- unsigned long start_pfn; -- unsigned long start; -- unsigned long len; -- void *mem_map; - int fd; -+ int size; -+ unsigned long phys; -+ unsigned long virt; - }; - --extern struct mem_region *regions[]; --extern struct mem_region physmem_region; -+extern struct iomem_region *iomem_regions; -+extern int iomem_size; - - #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) - - extern unsigned long host_task_size; - extern unsigned long task_size; - -+extern void check_devanon(void); - extern int init_mem_user(void); - extern int create_mem_file(unsigned long len); --extern void setup_range(int fd, char *driver, unsigned long start, -- unsigned long pfn, unsigned long total, int need_vm, -- struct mem_region *region, void *reserved); - extern void setup_memory(void *entry); - extern unsigned long find_iomem(char *driver, unsigned long *len_out); --extern int init_maps(struct mem_region *region); --extern int nregions(void); --extern int reserve_vm(unsigned long start, unsigned long end, void *e); -+extern int init_maps(unsigned long physmem, unsigned long iomem, -+ unsigned long highmem); - extern unsigned long get_vm(unsigned long len); - extern void setup_physmem(unsigned long start, unsigned long usable, -- unsigned long len); --extern int setup_region(struct mem_region *region, void *entry); -+ unsigned long len, unsigned long highmem); - extern void add_iomem(char *name, int fd, unsigned long size); --extern struct mem_region *phys_region(unsigned long phys); - extern unsigned long phys_offset(unsigned long phys); - extern void unmap_physmem(void); --extern int map_memory(unsigned long virt, unsigned long phys, -- unsigned long len, int r, int w, int x); -+extern void map_memory(unsigned long virt, unsigned long phys, -+ unsigned long len, int r, int w, int x); - extern int protect_memory(unsigned long addr, unsigned long len, - int r, int w, int x, int must_succeed); - extern unsigned long get_kmem_end(void); -Index: uml-2.6.7/arch/um/drivers/mcast_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/mcast_user.c 2004-07-16 19:36:45.956160064 +0300 -+++ uml-2.6.7/arch/um/drivers/mcast_user.c 2004-07-16 19:47:23.686210360 +0300 -@@ -23,6 +23,7 @@ - #include "kern_util.h" - #include "user_util.h" - #include "user.h" -+#include "os.h" - - #define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) - -@@ -62,7 +63,8 @@ - goto out; - } - -- if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0){ -+ fd = socket(AF_INET, SOCK_DGRAM, 0); -+ if (fd < 0){ - printk("mcast_open : data socket failed, errno = %d\n", - errno); - fd = -ENOMEM; -@@ -72,7 +74,7 @@ - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { - printk("mcast_open: SO_REUSEADDR failed, errno = %d\n", - errno); -- close(fd); -+ os_close_file(fd); - fd = -EINVAL; - goto out; - } -@@ -82,7 +84,7 @@ - sizeof(pri->ttl)) < 0) { - printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n", - errno); -- close(fd); -+ os_close_file(fd); - fd = -EINVAL; - goto out; - } -@@ -91,7 +93,7 @@ - if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) { - printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n", - errno); -- close(fd); -+ os_close_file(fd); - fd = -EINVAL; - goto out; - } -@@ -99,7 +101,7 @@ - /* bind socket to mcast address */ - if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) { - printk("mcast_open : data bind failed, errno = %d\n", errno); -- close(fd); -+ os_close_file(fd); - fd = -EINVAL; - goto out; - } -@@ -115,7 +117,7 @@ - "interface on the host.\n"); - printk("eth0 should be configured in order to use the " - "multicast transport.\n"); -- close(fd); -+ os_close_file(fd); - fd = -EINVAL; - } - -@@ -137,7 +139,7 @@ - errno); - } - -- close(fd); -+ os_close_file(fd); - } - - int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri) -Index: uml-2.6.7/arch/um/drivers/slirp_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/slirp_user.c 2004-07-16 19:37:02.720611480 +0300 -+++ uml-2.6.7/arch/um/drivers/slirp_user.c 2004-07-16 19:47:24.797041488 +0300 -@@ -4,8 +4,7 @@ - #include - #include - #include --#include --#include -+#include - #include - #include - #include "user_util.h" -@@ -48,15 +47,15 @@ - - return(pid); - } -- -+ -+/* XXX This is just a trivial wrapper around os_pipe */ - static int slirp_datachan(int *mfd, int *sfd) - { - int fds[2], err; - - err = os_pipe(fds, 1, 1); -- if(err){ -- printk("slirp_datachan: Failed to open pipe, errno = %d\n", -- -err); -+ if(err < 0){ -+ printk("slirp_datachan: Failed to open pipe, err = %d\n", -err); - return(err); - } - -@@ -77,7 +76,7 @@ - pid = slirp_tramp(pri->argw.argv, sfd); - - if(pid < 0){ -- printk("slirp_tramp failed - errno = %d\n", pid); -+ printk("slirp_tramp failed - errno = %d\n", -pid); - os_close_file(sfd); - os_close_file(mfd); - return(pid); -@@ -97,8 +96,8 @@ - struct slirp_data *pri = data; - int status,err; - -- close(fd); -- close(pri->slave); -+ os_close_file(fd); -+ os_close_file(pri->slave); - - pri->slave = -1; - -@@ -114,13 +113,13 @@ - } - #endif - -- err = waitpid(pri->pid, &status, WNOHANG); -- if(err<0) { -+ CATCH_EINTR(err = waitpid(pri->pid, &status, WNOHANG)); -+ if(err < 0) { - printk("slirp_close: waitpid returned %d\n", errno); - return; - } - -- if(err==0) { -+ if(err == 0) { - printk("slirp_close: process %d has not exited\n"); - return; - } -Index: uml-2.6.7/arch/um/kernel/tempfile.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tempfile.c 2004-07-16 19:36:48.252810920 +0300 -+++ uml-2.6.7/arch/um/kernel/tempfile.c 2004-07-16 19:47:23.740202152 +0300 -@@ -28,6 +28,7 @@ - } - if((dir == NULL) || (*dir == '\0')) - dir = "/tmp"; -+ - tempdir = malloc(strlen(dir) + 2); - if(tempdir == NULL){ - fprintf(stderr, "Failed to malloc tempdir, " -@@ -49,7 +50,8 @@ - else - *tempname = 0; - strcat(tempname, template); -- if((fd = mkstemp(tempname)) < 0){ -+ fd = mkstemp(tempname); -+ if(fd < 0){ - fprintf(stderr, "open - cannot create %s: %s\n", tempname, - strerror(errno)); - return -1; -@@ -59,7 +61,8 @@ - return -1; - } - if(out_tempname){ -- if((*out_tempname = strdup(tempname)) == NULL){ -+ *out_tempname = strdup(tempname); -+ if(*out_tempname == NULL){ - perror("strdup"); - return -1; - } -Index: uml-2.6.7/arch/um/sys-i386/util/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/sys-i386/util/Makefile 2004-07-16 19:37:40.976795656 +0300 -+++ uml-2.6.7/arch/um/sys-i386/util/Makefile 2004-07-16 19:47:23.773197136 +0300 -@@ -1,15 +1,10 @@ -+host-progs := mk_sc mk_thread -+always := $(host-progs) - --host-progs := mk_sc --always := $(host-progs) mk_thread --targets := mk_thread_kern.o mk_thread_user.o -+mk_thread-objs := mk_thread_kern.o mk_thread_user.o - --mk_sc-objs := mk_sc.o -- --$(obj)/mk_thread : $(obj)/mk_thread_kern.o $(obj)/mk_thread_user.o -- $(CC) $(CFLAGS) -o $@ $^ -- --$(obj)/mk_thread_user.o : $(src)/mk_thread_user.c -- $(CC) $(USER_CFLAGS) -c -o $@ $< -+HOSTCFLAGS_mk_thread_kern.o := $(CFLAGS) $(CPPFLAGS) -+HOSTCFLAGS_mk_thread_user.o := $(USER_CFLAGS) - - clean : - $(RM) -f $(build-targets) -Index: uml-2.6.7/arch/um/kernel/tt/tracer.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/tracer.c 2004-07-16 19:35:55.561821168 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/tracer.c 2004-07-16 19:47:24.801040880 +0300 -@@ -39,16 +39,17 @@ - return(0); - - register_winch_irq(tracer_winch[0], fd, -1, data); -- return(0); -+ return(1); - } - - static void tracer_winch_handler(int sig) - { -+ int n; - char c = 1; - -- if(write(tracer_winch[1], &c, sizeof(c)) != sizeof(c)) -- printk("tracer_winch_handler - write failed, errno = %d\n", -- errno); -+ n = os_write_file(tracer_winch[1], &c, sizeof(c)); -+ if(n != sizeof(c)) -+ printk("tracer_winch_handler - write failed, err = %d\n", -n); - } - - /* Called only by the tracing thread during initialization */ -@@ -58,9 +59,8 @@ - int err; - - err = os_pipe(tracer_winch, 1, 1); -- if(err){ -- printk("setup_tracer_winch : os_pipe failed, errno = %d\n", -- -err); -+ if(err < 0){ -+ printk("setup_tracer_winch : os_pipe failed, err = %d\n", -err); - return; - } - signal(SIGWINCH, tracer_winch_handler); -@@ -130,8 +130,8 @@ - case SIGTSTP: - if(ptrace(PTRACE_CONT, pid, 0, sig) < 0) - tracer_panic("sleeping_process_signal : Failed to " -- "continue pid %d, errno = %d\n", pid, -- sig); -+ "continue pid %d, signal = %d, " -+ "errno = %d\n", pid, sig, errno); - break; - - /* This happens when the debugger (e.g. strace) is doing system call -@@ -145,7 +145,7 @@ - if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) - tracer_panic("sleeping_process_signal : Failed to " - "PTRACE_SYSCALL pid %d, errno = %d\n", -- pid, sig); -+ pid, errno); - break; - case SIGSTOP: - break; -@@ -192,7 +192,7 @@ - printf("tracing thread pid = %d\n", tracing_pid); - - pid = clone(signal_tramp, sp, CLONE_FILES | SIGCHLD, init_proc); -- n = waitpid(pid, &status, WUNTRACED); -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0){ - printf("waitpid on idle thread failed, errno = %d\n", errno); - exit(1); -@@ -218,7 +218,7 @@ - err = attach(debugger_parent); - if(err){ - printf("Failed to attach debugger parent %d, " -- "errno = %d\n", debugger_parent, err); -+ "errno = %d\n", debugger_parent, -err); - debugger_parent = -1; - } - else { -@@ -233,7 +233,8 @@ - } - set_cmdline("(tracing thread)"); - while(1){ -- if((pid = waitpid(-1, &status, WUNTRACED)) <= 0){ -+ CATCH_EINTR(pid = waitpid(-1, &status, WUNTRACED)); -+ if(pid <= 0){ - if(errno != ECHILD){ - printf("wait failed - errno = %d\n", errno); - } -@@ -401,7 +402,7 @@ - - if(!strcmp(line, "go")) debug_stop = 0; - else if(!strcmp(line, "parent")) debug_parent = 1; -- else printk("Unknown debug option : '%s'\n", line); -+ else printf("Unknown debug option : '%s'\n", line); - - line = next; - } -Index: uml-2.6.7/arch/um/drivers/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/Makefile 2004-07-16 19:36:51.389334096 +0300 -+++ uml-2.6.7/arch/um/drivers/Makefile 2004-07-16 19:47:24.526082680 +0300 -@@ -1,5 +1,5 @@ - # --# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+# Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com) - # Licensed under the GPL - # - -@@ -15,7 +15,7 @@ - #pcap-objs := pcap_kern.o pcap_user.o $(PCAP) - net-objs := net_kern.o net_user.o - mconsole-objs := mconsole_kern.o mconsole_user.o --hostaudio-objs := hostaudio_kern.o hostaudio_user.o -+hostaudio-objs := hostaudio_kern.o - ubd-objs := ubd_kern.o ubd_user.o - port-objs := port_kern.o port_user.o - harddog-objs := harddog_kern.o harddog_user.o -@@ -39,6 +39,8 @@ - obj-$(CONFIG_TTY_CHAN) += tty.o - obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o - obj-$(CONFIG_UML_WATCHDOG) += harddog.o -+obj-$(CONFIG_BLK_DEV_COW) += cow_kern.o -+obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o - - obj-y += stdio_console.o $(CHAN_OBJS) - -@@ -46,18 +48,7 @@ - - USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) fd.o \ - null.o pty.o tty.o xterm.o --USER_OBJS := $(foreach file,$(USER_OBJS),arch/um/drivers/$(file)) -+USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) - - $(USER_OBJS) : %.o: %.c - $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< -- --clean: -- --modules: -- --fastdep: -- --dep: -- --archmrproper: clean -- -Index: uml-2.6.7/arch/um/kernel/tt/uaccess.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/uaccess.c 2004-07-16 19:47:23.625219632 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/uaccess.c 2004-07-16 19:47:23.751200480 +0300 -@@ -0,0 +1,73 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "asm/uaccess.h" -+ -+int copy_from_user_tt(void *to, const void *from, int n) -+{ -+ if(!access_ok_tt(VERIFY_READ, from, n)) -+ return(n); -+ -+ return(__do_copy_from_user(to, from, n, ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+int copy_to_user_tt(void *to, const void *from, int n) -+{ -+ if(!access_ok_tt(VERIFY_WRITE, to, n)) -+ return(n); -+ -+ return(__do_copy_to_user(to, from, n, ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+int strncpy_from_user_tt(char *dst, const char *src, int count) -+{ -+ int n; -+ -+ if(!access_ok_tt(VERIFY_READ, src, 1)) -+ return(-EFAULT); -+ -+ n = __do_strncpy_from_user(dst, src, count, -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher); -+ if(n < 0) return(-EFAULT); -+ return(n); -+} -+ -+int __clear_user_tt(void *mem, int len) -+{ -+ return(__do_clear_user(mem, len, -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+int clear_user_tt(void *mem, int len) -+{ -+ if(!access_ok_tt(VERIFY_WRITE, mem, len)) -+ return(len); -+ -+ return(__do_clear_user(mem, len, ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+int strnlen_user_tt(const void *str, int len) -+{ -+ return(__do_strnlen_user(str, len, -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/kernel/skas/util/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/util/Makefile 2004-07-16 19:37:20.411921992 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/util/Makefile 2004-07-16 19:47:23.735202912 +0300 -@@ -1,10 +1,9 @@ - all: mk_ptregs - - mk_ptregs : mk_ptregs.o -- $(CC) -o mk_ptregs mk_ptregs.o -+ $(HOSTCC) -o mk_ptregs mk_ptregs.o - - mk_ptregs.o : mk_ptregs.c -- $(CC) -c $< -+ $(HOSTCC) -c $< - --clean : -- $(RM) -f mk_ptregs *.o *~ -+clean-files := mk_ptregs *.o *~ -Index: uml-2.6.7/arch/um/kernel/skas/sys-i386/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/sys-i386/Makefile 2004-07-16 19:37:26.083059848 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/sys-i386/Makefile 2004-07-16 19:47:23.733203216 +0300 -@@ -10,5 +10,3 @@ - - $(USER_OBJS) : %.o: %.c - $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< -- --clean : -Index: uml-2.6.7/arch/um/kernel/sigio_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/sigio_kern.c 2004-07-16 19:36:16.630618224 +0300 -+++ uml-2.6.7/arch/um/kernel/sigio_kern.c 2004-07-16 19:47:23.725204432 +0300 -@@ -6,18 +6,21 @@ - #include "linux/kernel.h" - #include "linux/list.h" - #include "linux/slab.h" --#include "asm/irq.h" -+#include "linux/signal.h" -+#include "linux/interrupt.h" - #include "init.h" - #include "sigio.h" - #include "irq_user.h" -+#include "irq_kern.h" - - /* Protected by sigio_lock() called from write_sigio_workaround */ - static int sigio_irq_fd = -1; - --void sigio_interrupt(int irq, void *data, struct pt_regs *unused) -+irqreturn_t sigio_interrupt(int irq, void *data, struct pt_regs *unused) - { - read_sigio_fd(sigio_irq_fd); - reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ); -+ return(IRQ_HANDLED); - } - - int write_sigio_irq(int fd) -Index: uml-2.6.7/include/asm-um/system-generic.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/system-generic.h 2004-07-16 19:37:46.190003128 +0300 -+++ uml-2.6.7/include/asm-um/system-generic.h 2004-07-16 19:47:23.802192728 +0300 -@@ -23,8 +23,10 @@ - extern void block_signals(void); - extern void unblock_signals(void); - --#define local_save_flags(flags) do { (flags) = get_signals(); } while(0) --#define local_irq_restore(flags) do { set_signals(flags); } while(0) -+#define local_save_flags(flags) do { typecheck(unsigned long, flags); \ -+ (flags) = get_signals(); } while(0) -+#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \ -+ set_signals(flags); } while(0) - - #define local_irq_save(flags) do { local_save_flags(flags); \ - local_irq_disable(); } while(0) -@@ -39,4 +41,7 @@ - (flags == 0); \ - }) - -+extern void *_switch_to(void *prev, void *next, void *last); -+#define switch_to(prev, next, last) prev = _switch_to(prev, next, last) -+ - #endif -Index: uml-2.6.7/include/asm-um/uaccess.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/uaccess.h 2004-07-16 19:37:05.104249112 +0300 -+++ uml-2.6.7/include/asm-um/uaccess.h 2004-07-16 19:47:23.804192424 +0300 -@@ -6,6 +6,8 @@ - #ifndef __UM_UACCESS_H - #define __UM_UACCESS_H - -+#include "linux/sched.h" -+ - #define VERIFY_READ 0 - #define VERIFY_WRITE 1 - -Index: uml-2.6.7/arch/um/util/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/util/Makefile 2004-07-16 19:37:43.611395136 +0300 -+++ uml-2.6.7/arch/um/util/Makefile 2004-07-16 19:47:23.780196072 +0300 -@@ -1,23 +1,8 @@ --always := mk_task mk_constants --targets := mk_task_user.o mk_task_kern.o \ -- mk_constants_user.o mk_constants_kern.o -+host-progs := mk_task mk_constants -+always := $(host-progs) - --$(obj)/mk_task: $(obj)/mk_task_user.o $(obj)/mk_task_kern.o -- $(CC) -o $@ $^ -+mk_task-objs := mk_task_user.o mk_task_kern.o -+mk_constants-objs := mk_constants_user.o mk_constants_kern.o - --$(obj)/mk_task_user.o: $(src)/mk_task_user.c -- $(CC) -o $@ -c $< -- --$(obj)/mk_constants : $(obj)/mk_constants_user.o $(obj)/mk_constants_kern.o -- $(CC) -o $@ $^ -- --$(obj)/mk_constants_user.o : $(src)/mk_constants_user.c -- $(CC) -c $< -o $@ -- --$(obj)/mk_constants_kern.o : $(src)/mk_constants_kern.c -- $(CC) $(CFLAGS) -c $< -o $@ -- --clean: -- $(RM) $(build-targets) -- --archmrproper: -+HOSTCFLAGS_mk_task_kern.o := $(CFLAGS) $(CPPFLAGS) -+HOSTCFLAGS_mk_constants_kern.o := $(CFLAGS) $(CPPFLAGS) -Index: uml-2.6.7/arch/um/os-Linux/file.c -=================================================================== ---- uml-2.6.7.orig/arch/um/os-Linux/file.c 2004-07-16 19:37:43.530407448 +0300 -+++ uml-2.6.7/arch/um/os-Linux/file.c 2004-07-16 19:47:23.767198048 +0300 -@@ -8,6 +8,8 @@ - #include - #include - #include -+#include -+#include - #include - #include - #include -@@ -17,33 +19,235 @@ - #include "user.h" - #include "kern_util.h" - --int os_file_type(char *file) -+static void copy_stat(struct uml_stat *dst, struct stat64 *src) -+{ -+ *dst = ((struct uml_stat) { -+ .ust_dev = src->st_dev, /* device */ -+ .ust_ino = src->st_ino, /* inode */ -+ .ust_mode = src->st_mode, /* protection */ -+ .ust_nlink = src->st_nlink, /* number of hard links */ -+ .ust_uid = src->st_uid, /* user ID of owner */ -+ .ust_gid = src->st_gid, /* group ID of owner */ -+ .ust_size = src->st_size, /* total size, in bytes */ -+ .ust_blksize = src->st_blksize, /* blocksize for filesys I/O */ -+ .ust_blocks = src->st_blocks, /* number of blocks allocated */ -+ .ust_atime = src->st_atime, /* time of last access */ -+ .ust_mtime = src->st_mtime, /* time of last modification */ -+ .ust_ctime = src->st_ctime, /* time of last change */ -+ }); -+} -+ -+int os_stat_fd(const int fd, struct uml_stat *ubuf) -+{ -+ struct stat64 sbuf; -+ int err; -+ -+ do { -+ err = fstat64(fd, &sbuf); -+ } while((err < 0) && (errno == EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ -+ if(ubuf != NULL) -+ copy_stat(ubuf, &sbuf); -+ return(err); -+} -+ -+int os_stat_file(const char *file_name, struct uml_stat *ubuf) -+{ -+ struct stat64 sbuf; -+ int err; -+ -+ do { -+ err = stat64(file_name, &sbuf); -+ } while((err < 0) && (errno == EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ -+ if(ubuf != NULL) -+ copy_stat(ubuf, &sbuf); -+ return(err); -+} -+ -+int os_access(const char* file, int mode) -+{ -+ int amode, err; -+ -+ amode=(mode&OS_ACC_R_OK ? R_OK : 0) | (mode&OS_ACC_W_OK ? W_OK : 0) | -+ (mode&OS_ACC_X_OK ? X_OK : 0) | (mode&OS_ACC_F_OK ? F_OK : 0) ; -+ -+ err = access(file, amode); -+ if(err < 0) -+ return(-errno); -+ -+ return(0); -+} -+ -+void os_print_error(int error, const char* str) -+{ -+ errno = error < 0 ? -error : error; -+ -+ perror(str); -+} -+ -+/* FIXME? required only by hostaudio (because it passes ioctls verbatim) */ -+int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg) -+{ -+ int err; -+ -+ err = ioctl(fd, cmd, arg); -+ if(err < 0) -+ return(-errno); -+ -+ return(err); -+} -+ -+int os_window_size(int fd, int *rows, int *cols) -+{ -+ struct winsize size; -+ -+ if(ioctl(fd, TIOCGWINSZ, &size) < 0) -+ return(-errno); -+ -+ *rows = size.ws_row; -+ *cols = size.ws_col; -+ -+ return(0); -+} -+ -+int os_new_tty_pgrp(int fd, int pid) - { -- struct stat64 buf; -+ if(ioctl(fd, TIOCSCTTY, 0) < 0){ -+ printk("TIOCSCTTY failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if(tcsetpgrp(fd, pid) < 0){ -+ printk("tcsetpgrp failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ return(0); -+} -+ -+/* FIXME: ensure namebuf in os_get_if_name is big enough */ -+int os_get_ifname(int fd, char* namebuf) -+{ -+ if(ioctl(fd, SIOCGIFNAME, namebuf) < 0) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_set_slip(int fd) -+{ -+ int disc, sencap; -+ -+ disc = N_SLIP; -+ if(ioctl(fd, TIOCSETD, &disc) < 0){ -+ printk("Failed to set slip line discipline - " -+ "errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ sencap = 0; -+ if(ioctl(fd, SIOCSIFENCAP, &sencap) < 0){ -+ printk("Failed to set slip encapsulation - " -+ "errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ return(0); -+} -+ -+int os_set_owner(int fd, int pid) -+{ -+ if(fcntl(fd, F_SETOWN, pid) < 0){ -+ int save_errno = errno; -+ -+ if(fcntl(fd, F_GETOWN, 0) != pid) -+ return(-save_errno); -+ } -+ -+ return(0); -+} -+ -+/* FIXME? moved wholesale from sigio_user.c to get fcntls out of that file */ -+int os_sigio_async(int master, int slave) -+{ -+ int flags; - -- if(stat64(file, &buf) == -1) -+ flags = fcntl(master, F_GETFL); -+ if(flags < 0) { -+ printk("fcntl F_GETFL failed, errno = %d\n", errno); - return(-errno); -+ } -+ -+ if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || -+ (fcntl(master, F_SETOWN, os_getpid()) < 0)){ -+ printk("fcntl F_SETFL or F_SETOWN failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)){ -+ printk("fcntl F_SETFL failed, errno = %d\n", errno); -+ return(-errno); -+ } - -- if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR); -- else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK); -- else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV); -- else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV); -- else if(S_ISFIFO(buf.st_mode)) return(OS_TYPE_FIFO); -- else if(S_ISSOCK(buf.st_mode)) return(OS_TYPE_SOCK); -+ return(0); -+} -+ -+int os_mode_fd(int fd, int mode) -+{ -+ int err; -+ -+ do { -+ err = fchmod(fd, mode); -+ } while((err < 0) && (errno==EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_file_type(char *file) -+{ -+ struct uml_stat buf; -+ int err; -+ -+ err = os_stat_file(file, &buf); -+ if(err < 0) -+ return(err); -+ -+ if(S_ISDIR(buf.ust_mode)) return(OS_TYPE_DIR); -+ else if(S_ISLNK(buf.ust_mode)) return(OS_TYPE_SYMLINK); -+ else if(S_ISCHR(buf.ust_mode)) return(OS_TYPE_CHARDEV); -+ else if(S_ISBLK(buf.ust_mode)) return(OS_TYPE_BLOCKDEV); -+ else if(S_ISFIFO(buf.ust_mode)) return(OS_TYPE_FIFO); -+ else if(S_ISSOCK(buf.ust_mode)) return(OS_TYPE_SOCK); - else return(OS_TYPE_FILE); - } - - int os_file_mode(char *file, struct openflags *mode_out) - { -+ int err; -+ - *mode_out = OPENFLAGS(); - -- if(!access(file, W_OK)) *mode_out = of_write(*mode_out); -- else if(errno != EACCES) -- return(-errno); -+ err = os_access(file, OS_ACC_W_OK); -+ if((err < 0) && (err != -EACCES)) -+ return(err); - -- if(!access(file, R_OK)) *mode_out = of_read(*mode_out); -- else if(errno != EACCES) -- return(-errno); -+ *mode_out = of_write(*mode_out); -+ -+ err = os_access(file, OS_ACC_R_OK); -+ if((err < 0) && (err != -EACCES)) -+ return(err); -+ -+ *mode_out = of_read(*mode_out); - - return(0); - } -@@ -63,16 +267,14 @@ - if(flags.e) f |= O_EXCL; - - fd = open64(file, f, mode); -- if(fd < 0) return(-errno); -- -- if(flags.cl){ -- if(fcntl(fd, F_SETFD, 1)){ -- close(fd); -- return(-errno); -- } -+ if(fd < 0) -+ return(-errno); -+ -+ if(flags.cl && fcntl(fd, F_SETFD, 1)){ -+ os_close_file(fd); -+ return(-errno); - } - -- return(fd); - return(fd); - } - -@@ -90,7 +292,7 @@ - - err = connect(fd, (struct sockaddr *) &sock, sizeof(sock)); - if(err) -- return(err); -+ return(-errno); - - return(fd); - } -@@ -109,88 +311,162 @@ - return(0); - } - --int os_read_file(int fd, void *buf, int len) -+static int fault_buffer(void *start, int len, -+ int (*copy_proc)(void *addr, void *buf, int len)) - { -- int n; -+ int page = getpagesize(), i; -+ char c; - -- /* Force buf into memory if it's not already. */ -+ for(i = 0; i < len; i += page){ -+ if((*copy_proc)(start + i, &c, sizeof(c))) -+ return(-EFAULT); -+ } -+ if((len % page) != 0){ -+ if((*copy_proc)(start + len - 1, &c, sizeof(c))) -+ return(-EFAULT); -+ } -+ return(0); -+} - -- /* XXX This fails if buf is kernel memory */ --#ifdef notdef -- if(copy_to_user_proc(buf, &c, sizeof(c))) -- return(-EFAULT); --#endif -+static int file_io(int fd, void *buf, int len, -+ int (*io_proc)(int fd, void *buf, int len), -+ int (*copy_user_proc)(void *addr, void *buf, int len)) -+{ -+ int n, err; -+ -+ do { -+ n = (*io_proc)(fd, buf, len); -+ if((n < 0) && (errno == EFAULT)){ -+ err = fault_buffer(buf, len, copy_user_proc); -+ if(err) -+ return(err); -+ n = (*io_proc)(fd, buf, len); -+ } -+ } while((n < 0) && (errno == EINTR)); - -- n = read(fd, buf, len); - if(n < 0) - return(-errno); - return(n); - } - --int os_write_file(int fd, void *buf, int count) -+int os_read_file(int fd, void *buf, int len) - { -- int n; -- -- /* Force buf into memory if it's not already. */ -- -- /* XXX This fails if buf is kernel memory */ --#ifdef notdef -- if(copy_to_user_proc(buf, buf, buf[0])) -- return(-EFAULT); --#endif -+ return(file_io(fd, buf, len, (int (*)(int, void *, int)) read, -+ copy_from_user_proc)); -+} - -- n = write(fd, buf, count); -- if(n < 0) -- return(-errno); -- return(n); -+int os_write_file(int fd, const void *buf, int len) -+{ -+ return(file_io(fd, (void *) buf, len, -+ (int (*)(int, void *, int)) write, copy_to_user_proc)); - } - - int os_file_size(char *file, long long *size_out) - { -- struct stat64 buf; -+ struct uml_stat buf; -+ int err; - -- if(stat64(file, &buf) == -1){ -- printk("Couldn't stat \"%s\" : errno = %d\n", file, errno); -- return(-errno); -+ err = os_stat_file(file, &buf); -+ if(err < 0){ -+ printk("Couldn't stat \"%s\" : err = %d\n", file, -err); -+ return(err); - } -- if(S_ISBLK(buf.st_mode)){ -+ -+ if(S_ISBLK(buf.ust_mode)){ - int fd, blocks; - -- if((fd = open64(file, O_RDONLY)) < 0){ -- printk("Couldn't open \"%s\", errno = %d\n", file, -- errno); -- return(-errno); -+ fd = os_open_file(file, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Couldn't open \"%s\", errno = %d\n", file, -fd); -+ return(fd); - } - if(ioctl(fd, BLKGETSIZE, &blocks) < 0){ - printk("Couldn't get the block size of \"%s\", " - "errno = %d\n", file, errno); -- close(fd); -- return(-errno); -+ err = -errno; -+ os_close_file(fd); -+ return(err); - } - *size_out = ((long long) blocks) * 512; -- close(fd); -+ os_close_file(fd); - return(0); - } -- *size_out = buf.st_size; -+ *size_out = buf.ust_size; -+ return(0); -+} -+ -+int os_file_modtime(char *file, unsigned long *modtime) -+{ -+ struct uml_stat buf; -+ int err; -+ -+ err = os_stat_file(file, &buf); -+ if(err < 0){ -+ printk("Couldn't stat \"%s\" : err = %d\n", file, -err); -+ return(err); -+ } -+ -+ *modtime = buf.ust_mtime; - return(0); - } - -+int os_get_exec_close(int fd, int* close_on_exec) -+{ -+ int ret; -+ -+ do { -+ ret = fcntl(fd, F_GETFD); -+ } while((ret < 0) && (errno == EINTR)) ; -+ -+ if(ret < 0) -+ return(-errno); -+ -+ *close_on_exec = (ret&FD_CLOEXEC) ? 1 : 0; -+ return(ret); -+} -+ -+int os_set_exec_close(int fd, int close_on_exec) -+{ -+ int flag, err; -+ -+ if(close_on_exec) flag = FD_CLOEXEC; -+ else flag = 0; -+ -+ do { -+ err = fcntl(fd, F_SETFD, flag); -+ } while((err < 0) && (errno == EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ return(err); -+} -+ - int os_pipe(int *fds, int stream, int close_on_exec) - { - int err, type = stream ? SOCK_STREAM : SOCK_DGRAM; - - err = socketpair(AF_UNIX, type, 0, fds); -- if(err) -+ if(err < 0) - return(-errno); - - if(!close_on_exec) - return(0); - -- if((fcntl(fds[0], F_SETFD, 1) < 0) || (fcntl(fds[1], F_SETFD, 1) < 0)) -- printk("os_pipe : Setting FD_CLOEXEC failed, errno = %d", -- errno); -+ err = os_set_exec_close(fds[0], 1); -+ if(err < 0) -+ goto error; -+ -+ err = os_set_exec_close(fds[1], 1); -+ if(err < 0) -+ goto error; - - return(0); -+ -+ error: -+ printk("os_pipe : Setting FD_CLOEXEC failed, err = %d\n", -err); -+ os_close_file(fds[1]); -+ os_close_file(fds[0]); -+ return(err); - } - - int os_set_fd_async(int fd, int owner) -@@ -270,7 +546,7 @@ - return(-EINVAL); - } - err = shutdown(fd, what); -- if(err) -+ if(err < 0) - return(-errno); - return(0); - } -@@ -315,7 +591,7 @@ - return(new); - } - --int create_unix_socket(char *file, int len) -+int os_create_unix_socket(char *file, int len, int close_on_exec) - { - struct sockaddr_un addr; - int sock, err; -@@ -327,6 +603,13 @@ - return(-errno); - } - -+ if(close_on_exec) { -+ err = os_set_exec_close(sock, 1); -+ if(err < 0) -+ printk("create_unix_socket : close_on_exec failed, " -+ "err = %d", -err); -+ } -+ - addr.sun_family = AF_UNIX; - - /* XXX Be more careful about overflow */ -@@ -334,14 +617,45 @@ - - err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); - if (err < 0){ -- printk("create_listening_socket - bind failed, errno = %d\n", -- errno); -+ printk("create_listening_socket at '%s' - bind failed, " -+ "errno = %d\n", file, errno); - return(-errno); - } - - return(sock); - } - -+void os_flush_stdout(void) -+{ -+ fflush(stdout); -+} -+ -+int os_lock_file(int fd, int excl) -+{ -+ int type = excl ? F_WRLCK : F_RDLCK; -+ struct flock lock = ((struct flock) { .l_type = type, -+ .l_whence = SEEK_SET, -+ .l_start = 0, -+ .l_len = 0 } ); -+ int err, save; -+ -+ err = fcntl(fd, F_SETLK, &lock); -+ if(!err) -+ goto out; -+ -+ save = -errno; -+ err = fcntl(fd, F_GETLK, &lock); -+ if(err){ -+ err = -errno; -+ goto out; -+ } -+ -+ printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid); -+ err = save; -+ out: -+ return(err); -+} -+ - /* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically -Index: uml-2.6.7/arch/um/kernel/tt/sys-i386/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/sys-i386/Makefile 2004-07-16 19:36:33.266089248 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/sys-i386/Makefile 2004-07-16 19:47:23.750200632 +0300 -@@ -10,5 +10,3 @@ - - $(USER_OBJS) : %.o: %.c - $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< -- --clean : -Index: uml-2.6.7/arch/um/kernel/skas/exec_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/exec_user.c 2004-07-16 19:36:07.836955064 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/exec_user.c 2004-07-16 19:47:24.798041336 +0300 -@@ -11,6 +11,7 @@ - #include - #include "user.h" - #include "kern_util.h" -+#include "user_util.h" - #include "os.h" - #include "time_user.h" - -@@ -26,7 +27,7 @@ - - int user_thread(unsigned long stack, int flags) - { -- int pid, status; -+ int pid, status, err; - - pid = clone(user_thread_tramp, (void *) stack_sp(stack), - flags | CLONE_FILES | SIGCHLD, NULL); -@@ -35,7 +36,8 @@ - return(pid); - } - -- if(waitpid(pid, &status, WUNTRACED) < 0){ -+ CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED)); -+ if(err < 0){ - printk("user_thread - waitpid failed, errno = %d\n", errno); - return(-errno); - } -Index: uml-2.6.7/arch/um/kernel/tt/mem_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/mem_user.c 2004-07-16 19:36:16.863582808 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/mem_user.c 2004-07-16 19:47:23.745201392 +0300 -@@ -25,14 +25,13 @@ - size = (unsigned long) segment_end - - (unsigned long) segment_start; - data = create_mem_file(size); -- if((addr = mmap(NULL, size, PROT_WRITE | PROT_READ, -- MAP_SHARED, data, 0)) == MAP_FAILED){ -+ addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, data, 0); -+ if(addr == MAP_FAILED){ - perror("mapping new data segment"); - exit(1); - } - memcpy(addr, segment_start, size); -- if(switcheroo(data, prot, addr, segment_start, -- size) < 0){ -+ if(switcheroo(data, prot, addr, segment_start, size) < 0){ - printf("switcheroo failed\n"); - exit(1); - } -Index: uml-2.6.7/arch/um/kernel/ksyms.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/ksyms.c 2004-07-16 19:35:55.815782560 +0300 -+++ uml-2.6.7/arch/um/kernel/ksyms.c 2004-07-16 19:47:24.866031000 +0300 -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2001 - 2004 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -@@ -8,7 +8,7 @@ - #include "linux/string.h" - #include "linux/smp_lock.h" - #include "linux/spinlock.h" --#include -+#include "linux/highmem.h" - #include "asm/current.h" - #include "asm/delay.h" - #include "asm/processor.h" -@@ -19,6 +19,7 @@ - #include "asm/tlbflush.h" - #include "kern_util.h" - #include "user_util.h" -+#include "mem_user.h" - #include "os.h" - #include "helper.h" - -@@ -34,34 +35,66 @@ - EXPORT_SYMBOL(flush_tlb_range); - EXPORT_SYMBOL(host_task_size); - EXPORT_SYMBOL(arch_validate); -+EXPORT_SYMBOL(get_kmem_end); - --EXPORT_SYMBOL(region_pa); --EXPORT_SYMBOL(region_va); --EXPORT_SYMBOL(phys_mem_map); --EXPORT_SYMBOL(page_mem_map); - EXPORT_SYMBOL(page_to_phys); - EXPORT_SYMBOL(phys_to_page); - EXPORT_SYMBOL(high_physmem); - EXPORT_SYMBOL(empty_zero_page); - EXPORT_SYMBOL(um_virt_to_phys); -+EXPORT_SYMBOL(__virt_to_page); -+EXPORT_SYMBOL(to_phys); -+EXPORT_SYMBOL(to_virt); - EXPORT_SYMBOL(mode_tt); - EXPORT_SYMBOL(handle_page_fault); -+EXPORT_SYMBOL(find_iomem); - -+#ifdef CONFIG_MODE_TT -+EXPORT_SYMBOL(strncpy_from_user_tt); -+EXPORT_SYMBOL(copy_from_user_tt); -+EXPORT_SYMBOL(copy_to_user_tt); -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+EXPORT_SYMBOL(strncpy_from_user_skas); -+EXPORT_SYMBOL(copy_to_user_skas); -+EXPORT_SYMBOL(copy_from_user_skas); -+#endif -+ -+EXPORT_SYMBOL(os_stat_fd); -+EXPORT_SYMBOL(os_stat_file); -+EXPORT_SYMBOL(os_access); -+EXPORT_SYMBOL(os_print_error); -+EXPORT_SYMBOL(os_get_exec_close); -+EXPORT_SYMBOL(os_set_exec_close); - EXPORT_SYMBOL(os_getpid); - EXPORT_SYMBOL(os_open_file); - EXPORT_SYMBOL(os_read_file); - EXPORT_SYMBOL(os_write_file); - EXPORT_SYMBOL(os_seek_file); -+EXPORT_SYMBOL(os_lock_file); -+EXPORT_SYMBOL(os_ioctl_generic); - EXPORT_SYMBOL(os_pipe); - EXPORT_SYMBOL(os_file_type); -+EXPORT_SYMBOL(os_file_mode); -+EXPORT_SYMBOL(os_file_size); -+EXPORT_SYMBOL(os_flush_stdout); - EXPORT_SYMBOL(os_close_file); -+EXPORT_SYMBOL(os_set_fd_async); -+EXPORT_SYMBOL(os_set_fd_block); - EXPORT_SYMBOL(helper_wait); - EXPORT_SYMBOL(os_shutdown_socket); -+EXPORT_SYMBOL(os_create_unix_socket); - EXPORT_SYMBOL(os_connect_socket); -+EXPORT_SYMBOL(os_accept_connection); -+EXPORT_SYMBOL(os_rcv_fd); - EXPORT_SYMBOL(run_helper); - EXPORT_SYMBOL(start_thread); - EXPORT_SYMBOL(dump_thread); - -+EXPORT_SYMBOL(do_gettimeofday); -+EXPORT_SYMBOL(do_settimeofday); -+ - /* This is here because UML expands open to sys_open, not to a system - * call instruction. - */ -@@ -90,3 +123,13 @@ - EXPORT_SYMBOL(kmap_atomic_to_page); - #endif - -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/include/linux/gfp.h -=================================================================== ---- uml-2.6.7.orig/include/linux/gfp.h 2004-07-16 19:36:13.897033792 +0300 -+++ uml-2.6.7/include/linux/gfp.h 2004-07-16 19:47:23.805192272 +0300 -@@ -73,6 +73,11 @@ - * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets - * optimized to &contig_page_data at compile-time. - */ -+ -+#ifndef HAVE_ARCH_FREE_PAGE -+static inline void arch_free_page(struct page *page, int order) { } -+#endif -+ - extern struct page * - FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *)); - -Index: uml-2.6.7/arch/um/defconfig -=================================================================== ---- uml-2.6.7.orig/arch/um/defconfig 2004-07-16 19:37:02.601629568 +0300 -+++ uml-2.6.7/arch/um/defconfig 2004-07-16 19:47:23.676211880 +0300 -@@ -3,29 +3,19 @@ - # - CONFIG_USERMODE=y - CONFIG_MMU=y --CONFIG_SWAP=y - CONFIG_UID16=y - CONFIG_RWSEM_GENERIC_SPINLOCK=y --CONFIG_CONFIG_LOG_BUF_SHIFT=14 - - # --# Code maturity level options --# --CONFIG_EXPERIMENTAL=y -- --# --# General Setup -+# UML-specific options - # - CONFIG_MODE_TT=y - CONFIG_MODE_SKAS=y - CONFIG_NET=y --CONFIG_SYSVIPC=y --CONFIG_BSD_PROCESS_ACCT=y --CONFIG_SYSCTL=y --CONFIG_BINFMT_AOUT=y - CONFIG_BINFMT_ELF=y - CONFIG_BINFMT_MISC=y - CONFIG_HOSTFS=y -+CONFIG_HPPFS=y - CONFIG_MCONSOLE=y - CONFIG_MAGIC_SYSRQ=y - # CONFIG_HOST_2G_2G is not set -@@ -36,12 +26,43 @@ - # CONFIG_HIGHMEM is not set - CONFIG_PROC_MM=y - CONFIG_KERNEL_STACK_ORDER=2 -+CONFIG_UML_REAL_TIME_CLOCK=y -+ -+# -+# Code maturity level options -+# -+CONFIG_EXPERIMENTAL=y -+CONFIG_CLEAN_COMPILE=y -+CONFIG_STANDALONE=y -+CONFIG_BROKEN_ON_SMP=y -+ -+# -+# General setup -+# -+CONFIG_SWAP=y -+CONFIG_SYSVIPC=y -+CONFIG_BSD_PROCESS_ACCT=y -+CONFIG_SYSCTL=y -+CONFIG_LOG_BUF_SHIFT=14 -+# CONFIG_HOTPLUG is not set -+# CONFIG_IKCONFIG is not set -+# CONFIG_EMBEDDED is not set -+CONFIG_KALLSYMS=y -+CONFIG_FUTEX=y -+CONFIG_EPOLL=y -+CONFIG_IOSCHED_NOOP=y -+CONFIG_IOSCHED_AS=y -+CONFIG_IOSCHED_DEADLINE=y -+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set - - # - # Loadable module support - # --CONFIG_MODULES=y --# CONFIG_KMOD is not set -+# CONFIG_MODULES is not set -+ -+# -+# Generic Driver Options -+# - - # - # Character Devices -@@ -69,6 +90,7 @@ - # - CONFIG_BLK_DEV_UBD=y - # CONFIG_BLK_DEV_UBD_SYNC is not set -+CONFIG_BLK_DEV_COW_COMMON=y - CONFIG_BLK_DEV_LOOP=y - CONFIG_BLK_DEV_NBD=y - CONFIG_BLK_DEV_RAM=y -@@ -78,7 +100,7 @@ - CONFIG_NETDEVICES=y - - # --# Network Devices -+# UML Network Devices - # - CONFIG_UML_NET=y - CONFIG_UML_NET_ETHERTAP=y -@@ -88,22 +110,6 @@ - CONFIG_UML_NET_MCAST=y - # CONFIG_UML_NET_PCAP is not set - CONFIG_UML_NET_SLIRP=y --CONFIG_DUMMY=y --# CONFIG_BONDING is not set --# CONFIG_EQUALIZER is not set --CONFIG_TUN=y --# CONFIG_ETHERTAP is not set --CONFIG_PPP=y --# CONFIG_PPP_MULTILINK is not set --# CONFIG_PPP_ASYNC is not set --# CONFIG_PPP_SYNC_TTY is not set --# CONFIG_PPP_DEFLATE is not set --# CONFIG_PPP_BSDCOMP is not set --# CONFIG_PPPOE is not set --CONFIG_SLIP=y --# CONFIG_SLIP_COMPRESSED is not set --# CONFIG_SLIP_SMART is not set --# CONFIG_SLIP_MODE_SLIP6 is not set - - # - # Networking support -@@ -115,8 +121,6 @@ - CONFIG_PACKET=y - CONFIG_PACKET_MMAP=y - # CONFIG_NETLINK_DEV is not set --# CONFIG_NETFILTER is not set --# CONFIG_FILTER is not set - CONFIG_UNIX=y - # CONFIG_NET_KEY is not set - CONFIG_INET=y -@@ -130,8 +134,11 @@ - # CONFIG_SYN_COOKIES is not set - # CONFIG_INET_AH is not set - # CONFIG_INET_ESP is not set --# CONFIG_XFRM_USER is not set -+# CONFIG_INET_IPCOMP is not set - # CONFIG_IPV6 is not set -+# CONFIG_DECNET is not set -+# CONFIG_BRIDGE is not set -+# CONFIG_NETFILTER is not set - - # - # SCTP Configuration (EXPERIMENTAL) -@@ -140,9 +147,9 @@ - # CONFIG_IP_SCTP is not set - # CONFIG_ATM is not set - # CONFIG_VLAN_8021Q is not set --# CONFIG_LLC is not set --# CONFIG_DECNET is not set --# CONFIG_BRIDGE is not set -+# CONFIG_LLC2 is not set -+# CONFIG_IPX is not set -+# CONFIG_ATALK is not set - # CONFIG_X25 is not set - # CONFIG_LAPB is not set - # CONFIG_NET_DIVERT is not set -@@ -160,6 +167,10 @@ - # Network testing - # - # CONFIG_NET_PKTGEN is not set -+CONFIG_DUMMY=y -+# CONFIG_BONDING is not set -+# CONFIG_EQUALIZER is not set -+CONFIG_TUN=y - - # - # Ethernet (10 or 100Mbit) -@@ -171,12 +182,28 @@ - # - - # -+# Ethernet (10000 Mbit) -+# -+CONFIG_PPP=y -+# CONFIG_PPP_MULTILINK is not set -+# CONFIG_PPP_FILTER is not set -+# CONFIG_PPP_ASYNC is not set -+# CONFIG_PPP_SYNC_TTY is not set -+# CONFIG_PPP_DEFLATE is not set -+# CONFIG_PPP_BSDCOMP is not set -+# CONFIG_PPPOE is not set -+CONFIG_SLIP=y -+# CONFIG_SLIP_COMPRESSED is not set -+# CONFIG_SLIP_SMART is not set -+# CONFIG_SLIP_MODE_SLIP6 is not set -+ -+# - # Wireless LAN (non-hamradio) - # - # CONFIG_NET_RADIO is not set - - # --# Token Ring devices (depends on LLC=y) -+# Token Ring devices - # - # CONFIG_SHAPER is not set - -@@ -186,68 +213,100 @@ - # CONFIG_WAN is not set - - # -+# Amateur Radio support -+# -+# CONFIG_HAMRADIO is not set -+ -+# -+# IrDA (infrared) support -+# -+# CONFIG_IRDA is not set -+ -+# -+# Bluetooth support -+# -+# CONFIG_BT is not set -+ -+# - # File systems - # -+CONFIG_EXT2_FS=y -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT3_FS is not set -+# CONFIG_JBD is not set -+CONFIG_REISERFS_FS=y -+# CONFIG_REISERFS_CHECK is not set -+# CONFIG_REISERFS_PROC_INFO is not set -+# CONFIG_JFS_FS is not set -+# CONFIG_XFS_FS is not set -+CONFIG_MINIX_FS=y -+# CONFIG_ROMFS_FS is not set - CONFIG_QUOTA=y - # CONFIG_QFMT_V1 is not set - # CONFIG_QFMT_V2 is not set - CONFIG_QUOTACTL=y --CONFIG_AUTOFS_FS=m --CONFIG_AUTOFS4_FS=m --CONFIG_REISERFS_FS=m --# CONFIG_REISERFS_CHECK is not set --# CONFIG_REISERFS_PROC_INFO is not set -+CONFIG_AUTOFS_FS=y -+CONFIG_AUTOFS4_FS=y -+ -+# -+# CD-ROM/DVD Filesystems -+# -+CONFIG_ISO9660_FS=y -+# CONFIG_JOLIET is not set -+# CONFIG_ZISOFS is not set -+# CONFIG_UDF_FS is not set -+ -+# -+# DOS/FAT/NT Filesystems -+# -+CONFIG_FAT_FS=y -+CONFIG_MSDOS_FS=y -+CONFIG_VFAT_FS=y -+# CONFIG_NTFS_FS is not set -+ -+# -+# Pseudo filesystems -+# -+CONFIG_PROC_FS=y -+CONFIG_PROC_KCORE=y -+CONFIG_DEVFS_FS=y -+CONFIG_DEVFS_MOUNT=y -+# CONFIG_DEVFS_DEBUG is not set -+# CONFIG_DEVPTS_FS_XATTR is not set -+CONFIG_TMPFS=y -+# CONFIG_HUGETLB_PAGE is not set -+CONFIG_RAMFS=y -+ -+# -+# Miscellaneous filesystems -+# - # CONFIG_ADFS_FS is not set - # CONFIG_AFFS_FS is not set - # CONFIG_HFS_FS is not set -+# CONFIG_HFSPLUS_FS is not set - # CONFIG_BEFS_FS is not set - # CONFIG_BFS_FS is not set --# CONFIG_EXT3_FS is not set --# CONFIG_JBD is not set --CONFIG_FAT_FS=m --CONFIG_MSDOS_FS=m --CONFIG_VFAT_FS=m - # CONFIG_EFS_FS is not set - CONFIG_JFFS_FS=y - CONFIG_JFFS_FS_VERBOSE=0 --CONFIG_JFFS_PROC_FS=y - # CONFIG_JFFS2_FS is not set - # CONFIG_CRAMFS is not set --# CONFIG_TMPFS is not set --CONFIG_RAMFS=y --CONFIG_ISO9660_FS=m --# CONFIG_JOLIET is not set --# CONFIG_ZISOFS is not set --# CONFIG_JFS_FS is not set --CONFIG_MINIX_FS=m - # CONFIG_VXFS_FS is not set --# CONFIG_NTFS_FS is not set - # CONFIG_HPFS_FS is not set --CONFIG_PROC_FS=y --CONFIG_DEVFS_FS=y --CONFIG_DEVFS_MOUNT=y --# CONFIG_DEVFS_DEBUG is not set --CONFIG_DEVPTS_FS=y - # CONFIG_QNX4FS_FS is not set --# CONFIG_ROMFS_FS is not set --CONFIG_EXT2_FS=y --# CONFIG_EXT2_FS_XATTR is not set - # CONFIG_SYSV_FS is not set --# CONFIG_UDF_FS is not set - # CONFIG_UFS_FS is not set --# CONFIG_XFS_FS is not set - - # - # Network File Systems - # --# CONFIG_CODA_FS is not set --# CONFIG_INTERMEZZO_FS is not set - # CONFIG_NFS_FS is not set - # CONFIG_NFSD is not set - # CONFIG_EXPORTFS is not set --# CONFIG_CIFS is not set - # CONFIG_SMB_FS is not set -+# CONFIG_CIFS is not set - # CONFIG_NCP_FS is not set -+# CONFIG_CODA_FS is not set - # CONFIG_AFS_FS is not set - - # -@@ -255,11 +314,11 @@ - # - # CONFIG_PARTITION_ADVANCED is not set - CONFIG_MSDOS_PARTITION=y --CONFIG_NLS=y - - # - # Native Language Support - # -+CONFIG_NLS=y - CONFIG_NLS_DEFAULT="iso8859-1" - # CONFIG_NLS_CODEPAGE_437 is not set - # CONFIG_NLS_CODEPAGE_737 is not set -@@ -317,28 +376,7 @@ - # - # SCSI support - # --CONFIG_SCSI=y --CONFIG_GENERIC_ISA_DMA=y -- --# --# SCSI support type (disk, tape, CD-ROM) --# --CONFIG_BLK_DEV_SD=y --CONFIG_SD_EXTRA_DEVS=40 --CONFIG_CHR_DEV_ST=y --CONFIG_BLK_DEV_SR=y --CONFIG_BLK_DEV_SR_VENDOR=y --CONFIG_SR_EXTRA_DEVS=2 --CONFIG_CHR_DEV_SG=y -- --# --# Some SCSI devices (e.g. CD jukebox) support multiple LUNs --# --CONFIG_SCSI_DEBUG_QUEUES=y --CONFIG_SCSI_MULTI_LUN=y --CONFIG_SCSI_CONSTANTS=y --CONFIG_SCSI_LOGGING=y --CONFIG_SCSI_DEBUG=y -+# CONFIG_SCSI is not set - - # - # Multi-device support (RAID and LVM) -@@ -360,6 +398,7 @@ - CONFIG_MTD_BLOCK=y - # CONFIG_FTL is not set - # CONFIG_NFTL is not set -+# CONFIG_INFTL is not set - - # - # RAM/ROM/Flash chip drivers -@@ -374,20 +413,21 @@ - # - # Mapping drivers for chip access - # -+# CONFIG_MTD_COMPLEX_MAPPINGS is not set - - # - # Self-contained MTD device drivers - # - # CONFIG_MTD_SLRAM is not set - # CONFIG_MTD_MTDRAM is not set --CONFIG_MTD_BLKMTD=m -+CONFIG_MTD_BLKMTD=y - - # - # Disk-On-Chip Device Drivers - # --# CONFIG_MTD_DOC1000 is not set - # CONFIG_MTD_DOC2000 is not set - # CONFIG_MTD_DOC2001 is not set -+# CONFIG_MTD_DOC2001PLUS is not set - - # - # NAND Flash Device Drivers -Index: uml-2.6.7/arch/um/kernel/physmem.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/physmem.c 2004-07-16 19:47:23.617220848 +0300 -+++ uml-2.6.7/arch/um/kernel/physmem.c 2004-07-16 19:47:23.722204888 +0300 -@@ -0,0 +1,468 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/mm.h" -+#include "linux/ghash.h" -+#include "linux/slab.h" -+#include "linux/vmalloc.h" -+#include "linux/bootmem.h" -+#include "asm/types.h" -+#include "asm/pgtable.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "mode_kern.h" -+#include "mem.h" -+#include "mem_user.h" -+#include "os.h" -+#include "kern.h" -+#include "init.h" -+ -+#if 0 -+static pgd_t physmem_pgd[PTRS_PER_PGD]; -+ -+static struct phys_desc *lookup_mapping(void *addr) -+{ -+ pgd = &physmem_pgd[pgd_index(addr)]; -+ if(pgd_none(pgd)) -+ return(NULL); -+ -+ pmd = pmd_offset(pgd, addr); -+ if(pmd_none(pmd)) -+ return(NULL); -+ -+ pte = pte_offset_kernel(pmd, addr); -+ return((struct phys_desc *) pte_val(pte)); -+} -+ -+static struct add_mapping(void *addr, struct phys_desc *new) -+{ -+} -+#endif -+ -+#define PHYS_HASHSIZE (8192) -+ -+struct phys_desc; -+ -+DEF_HASH_STRUCTS(virtmem, PHYS_HASHSIZE, struct phys_desc); -+ -+struct phys_desc { -+ struct virtmem_ptrs virt_ptrs; -+ int fd; -+ __u64 offset; -+ void *virt; -+ unsigned long phys; -+ struct list_head list; -+}; -+ -+struct virtmem_table virtmem_hash; -+ -+static int virt_cmp(void *virt1, void *virt2) -+{ -+ return(virt1 != virt2); -+} -+ -+static int virt_hash(void *virt) -+{ -+ unsigned long addr = ((unsigned long) virt) >> PAGE_SHIFT; -+ return(addr % PHYS_HASHSIZE); -+} -+ -+DEF_HASH(static, virtmem, struct phys_desc, virt_ptrs, void *, virt, virt_cmp, -+ virt_hash); -+ -+LIST_HEAD(descriptor_mappings); -+ -+struct desc_mapping { -+ int fd; -+ struct list_head list; -+ struct list_head pages; -+}; -+ -+static struct desc_mapping *find_mapping(int fd) -+{ -+ struct desc_mapping *desc; -+ struct list_head *ele; -+ -+ list_for_each(ele, &descriptor_mappings){ -+ desc = list_entry(ele, struct desc_mapping, list); -+ if(desc->fd == fd) -+ return(desc); -+ } -+ -+ return(NULL); -+} -+ -+static struct desc_mapping *descriptor_mapping(int fd) -+{ -+ struct desc_mapping *desc; -+ -+ desc = find_mapping(fd); -+ if(desc != NULL) -+ return(desc); -+ -+ desc = kmalloc(sizeof(*desc), GFP_ATOMIC); -+ if(desc == NULL) -+ return(NULL); -+ -+ *desc = ((struct desc_mapping) -+ { .fd = fd, -+ .list = LIST_HEAD_INIT(desc->list), -+ .pages = LIST_HEAD_INIT(desc->pages) }); -+ list_add(&desc->list, &descriptor_mappings); -+ -+ return(desc); -+} -+ -+int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w) -+{ -+ struct desc_mapping *fd_maps; -+ struct phys_desc *desc; -+ unsigned long phys; -+ int err; -+ -+ fd_maps = descriptor_mapping(fd); -+ if(fd_maps == NULL) -+ return(-ENOMEM); -+ -+ phys = __pa(virt); -+ if(find_virtmem_hash(&virtmem_hash, virt) != NULL) -+ panic("Address 0x%p is already substituted\n", virt); -+ -+ err = -ENOMEM; -+ desc = kmalloc(sizeof(*desc), GFP_ATOMIC); -+ if(desc == NULL) -+ goto out; -+ -+ *desc = ((struct phys_desc) -+ { .virt_ptrs = { NULL, NULL }, -+ .fd = fd, -+ .offset = offset, -+ .virt = virt, -+ .phys = __pa(virt), -+ .list = LIST_HEAD_INIT(desc->list) }); -+ insert_virtmem_hash(&virtmem_hash, desc); -+ -+ list_add(&desc->list, &fd_maps->pages); -+ -+ virt = (void *) ((unsigned long) virt & PAGE_MASK); -+ err = os_map_memory(virt, fd, offset, PAGE_SIZE, 1, w, 0); -+ if(!err) -+ goto out; -+ -+ remove_virtmem_hash(&virtmem_hash, desc); -+ kfree(desc); -+ out: -+ return(err); -+} -+ -+static int physmem_fd = -1; -+ -+static void remove_mapping(struct phys_desc *desc) -+{ -+ void *virt = desc->virt; -+ int err; -+ -+ remove_virtmem_hash(&virtmem_hash, desc); -+ list_del(&desc->list); -+ kfree(desc); -+ -+ err = os_map_memory(virt, physmem_fd, __pa(virt), PAGE_SIZE, 1, 1, 0); -+ if(err) -+ panic("Failed to unmap block device page from physical memory, " -+ "errno = %d", -err); -+} -+ -+int physmem_remove_mapping(void *virt) -+{ -+ struct phys_desc *desc; -+ -+ virt = (void *) ((unsigned long) virt & PAGE_MASK); -+ desc = find_virtmem_hash(&virtmem_hash, virt); -+ if(desc == NULL) -+ return(0); -+ -+ remove_mapping(desc); -+ return(1); -+} -+ -+void physmem_forget_descriptor(int fd) -+{ -+ struct desc_mapping *desc; -+ struct phys_desc *page; -+ struct list_head *ele, *next; -+ __u64 offset; -+ void *addr; -+ int err; -+ -+ desc = find_mapping(fd); -+ if(desc == NULL) -+ return; -+ -+ list_for_each_safe(ele, next, &desc->pages){ -+ page = list_entry(ele, struct phys_desc, list); -+ offset = page->offset; -+ addr = page->virt; -+ remove_mapping(page); -+ err = os_seek_file(fd, offset); -+ if(err) -+ panic("physmem_forget_descriptor - failed to seek " -+ "to %lld in fd %d, error = %d\n", -+ offset, fd, -err); -+ err = os_read_file(fd, addr, PAGE_SIZE); -+ if(err < 0) -+ panic("physmem_forget_descriptor - failed to read " -+ "from fd %d to 0x%p, error = %d\n", -+ fd, addr, -err); -+ } -+ -+ list_del(&desc->list); -+ kfree(desc); -+} -+ -+void arch_free_page(struct page *page, int order) -+{ -+ void *virt; -+ int i; -+ -+ for(i = 0; i < (1 << order); i++){ -+ virt = __va(page_to_phys(page + i)); -+ physmem_remove_mapping(virt); -+ } -+} -+ -+int is_remapped(void *virt) -+{ -+ return(find_virtmem_hash(&virtmem_hash, virt) != NULL); -+} -+ -+/* Changed during early boot */ -+unsigned long high_physmem; -+ -+extern unsigned long physmem_size; -+ -+void *to_virt(unsigned long phys) -+{ -+ return((void *) uml_physmem + phys); -+} -+ -+unsigned long to_phys(void *virt) -+{ -+ return(((unsigned long) virt) - uml_physmem); -+} -+ -+int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) -+{ -+ struct page *p, *map; -+ unsigned long phys_len, phys_pages, highmem_len, highmem_pages; -+ unsigned long iomem_len, iomem_pages, total_len, total_pages; -+ int i; -+ -+ phys_pages = physmem >> PAGE_SHIFT; -+ phys_len = phys_pages * sizeof(struct page); -+ -+ iomem_pages = iomem >> PAGE_SHIFT; -+ iomem_len = iomem_pages * sizeof(struct page); -+ -+ highmem_pages = highmem >> PAGE_SHIFT; -+ highmem_len = highmem_pages * sizeof(struct page); -+ -+ total_pages = phys_pages + iomem_pages + highmem_pages; -+ total_len = phys_len + iomem_pages + highmem_len; -+ -+ if(kmalloc_ok){ -+ map = kmalloc(total_len, GFP_KERNEL); -+ if(map == NULL) -+ map = vmalloc(total_len); -+ } -+ else map = alloc_bootmem_low_pages(total_len); -+ -+ if(map == NULL) -+ return(-ENOMEM); -+ -+ for(i = 0; i < total_pages; i++){ -+ p = &map[i]; -+ set_page_count(p, 0); -+ SetPageReserved(p); -+ INIT_LIST_HEAD(&p->lru); -+ } -+ -+ mem_map = map; -+ max_mapnr = total_pages; -+ return(0); -+} -+ -+struct page *phys_to_page(const unsigned long phys) -+{ -+ return(&mem_map[phys >> PAGE_SHIFT]); -+} -+ -+struct page *__virt_to_page(const unsigned long virt) -+{ -+ return(&mem_map[__pa(virt) >> PAGE_SHIFT]); -+} -+ -+unsigned long page_to_phys(struct page *page) -+{ -+ return((page - mem_map) << PAGE_SHIFT); -+} -+ -+pte_t mk_pte(struct page *page, pgprot_t pgprot) -+{ -+ pte_t pte; -+ -+ pte_val(pte) = page_to_phys(page) + pgprot_val(pgprot); -+ if(pte_present(pte)) pte_mknewprot(pte_mknewpage(pte)); -+ return(pte); -+} -+ -+/* Changed during early boot */ -+static unsigned long kmem_top = 0; -+ -+unsigned long get_kmem_end(void) -+{ -+ if(kmem_top == 0) -+ kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); -+ return(kmem_top); -+} -+ -+void map_memory(unsigned long virt, unsigned long phys, unsigned long len, -+ int r, int w, int x) -+{ -+ __u64 offset; -+ int fd, err; -+ -+ fd = phys_mapping(phys, &offset); -+ err = os_map_memory((void *) virt, fd, offset, len, r, w, x); -+ if(err) -+ panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, " -+ "err = %d\n", virt, fd, offset, len, r, w, x, err); -+} -+ -+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -+ -+void setup_physmem(unsigned long start, unsigned long reserve_end, -+ unsigned long len, unsigned long highmem) -+{ -+ unsigned long reserve = reserve_end - start; -+ int pfn = PFN_UP(__pa(reserve_end)); -+ int delta = (len - reserve) >> PAGE_SHIFT; -+ int err, offset, bootmap_size; -+ -+ physmem_fd = create_mem_file(len + highmem); -+ -+ offset = uml_reserved - uml_physmem; -+ err = os_map_memory((void *) uml_reserved, physmem_fd, offset, -+ len - offset, 1, 1, 0); -+ if(err < 0){ -+ os_print_error(err, "Mapping memory"); -+ exit(1); -+ } -+ -+ bootmap_size = init_bootmem(pfn, pfn + delta); -+ free_bootmem(__pa(reserve_end) + bootmap_size, -+ len - bootmap_size - reserve); -+} -+ -+int phys_mapping(unsigned long phys, __u64 *offset_out) -+{ -+ struct phys_desc *desc = find_virtmem_hash(&virtmem_hash, -+ __va(phys & PAGE_MASK)); -+ int fd = -1; -+ -+ if(desc != NULL){ -+ fd = desc->fd; -+ *offset_out = desc->offset; -+ } -+ else if(phys < physmem_size){ -+ fd = physmem_fd; -+ *offset_out = phys; -+ } -+ else if(phys < __pa(end_iomem)){ -+ struct iomem_region *region = iomem_regions; -+ -+ while(region != NULL){ -+ if((phys >= region->phys) && -+ (phys < region->phys + region->size)){ -+ fd = region->fd; -+ *offset_out = phys - region->phys; -+ break; -+ } -+ region = region->next; -+ } -+ } -+ else if(phys < __pa(end_iomem) + highmem){ -+ fd = physmem_fd; -+ *offset_out = phys - iomem_size; -+ } -+ -+ return(fd); -+} -+ -+static int __init uml_mem_setup(char *line, int *add) -+{ -+ char *retptr; -+ physmem_size = memparse(line,&retptr); -+ return 0; -+} -+__uml_setup("mem=", uml_mem_setup, -+"mem=\n" -+" This controls how much \"physical\" memory the kernel allocates\n" -+" for the system. The size is specified as a number followed by\n" -+" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" -+" This is not related to the amount of memory in the host. It can\n" -+" be more, and the excess, if it's ever used, will just be swapped out.\n" -+" Example: mem=64M\n\n" -+); -+ -+unsigned long find_iomem(char *driver, unsigned long *len_out) -+{ -+ struct iomem_region *region = iomem_regions; -+ -+ while(region != NULL){ -+ if(!strcmp(region->driver, driver)){ -+ *len_out = region->size; -+ return(region->virt); -+ } -+ } -+ -+ return(0); -+} -+ -+int setup_iomem(void) -+{ -+ struct iomem_region *region = iomem_regions; -+ unsigned long iomem_start = high_physmem + PAGE_SIZE; -+ int err; -+ -+ while(region != NULL){ -+ err = os_map_memory((void *) iomem_start, region->fd, 0, -+ region->size, 1, 1, 0); -+ if(err) -+ printk("Mapping iomem region for driver '%s' failed, " -+ "errno = %d\n", region->driver, -err); -+ else { -+ region->virt = iomem_start; -+ region->phys = __pa(region->virt); -+ } -+ -+ iomem_start += region->size + PAGE_SIZE; -+ region = region->next; -+ } -+ -+ return(0); -+} -+ -+__initcall(setup_iomem); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/kernel/irq_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/irq_user.c 2004-07-16 19:36:33.534048512 +0300 -+++ uml-2.6.7/arch/um/kernel/irq_user.c 2004-07-16 19:47:23.717205648 +0300 -@@ -6,7 +6,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -49,7 +48,8 @@ - - if(smp_sigio_handler()) return; - while(1){ -- if((n = poll(pollfds, pollfds_num, 0)) < 0){ -+ n = poll(pollfds, pollfds_num, 0); -+ if(n < 0){ - if(errno == EINTR) continue; - printk("sigio_handler : poll returned %d, " - "errno = %d\n", n, errno); -@@ -366,34 +366,31 @@ - - void forward_ipi(int fd, int pid) - { -- if(fcntl(fd, F_SETOWN, pid) < 0){ -- int save_errno = errno; -- if(fcntl(fd, F_GETOWN, 0) != pid){ -- printk("forward_ipi: F_SETOWN failed, fd = %d, " -- "me = %d, target = %d, errno = %d\n", fd, -- os_getpid(), pid, save_errno); -- } -- } -+ int err; -+ -+ err = os_set_owner(fd, pid); -+ if(err < 0) -+ printk("forward_ipi: set_owner failed, fd = %d, me = %d, " -+ "target = %d, err = %d\n", fd, os_getpid(), pid, -err); - } - - void forward_interrupts(int pid) - { - struct irq_fd *irq; - unsigned long flags; -+ int err; - - flags = irq_lock(); - for(irq=active_fds;irq != NULL;irq = irq->next){ -- if(fcntl(irq->fd, F_SETOWN, pid) < 0){ -- int save_errno = errno; -- if(fcntl(irq->fd, F_GETOWN, 0) != pid){ -- /* XXX Just remove the irq rather than -- * print out an infinite stream of these -- */ -- printk("Failed to forward %d to pid %d, " -- "errno = %d\n", irq->fd, pid, -- save_errno); -- } -+ err = os_set_owner(irq->fd, pid); -+ if(err < 0){ -+ /* XXX Just remove the irq rather than -+ * print out an infinite stream of these -+ */ -+ printk("Failed to forward %d to pid %d, err = %d\n", -+ irq->fd, pid, -err); - } -+ - irq->pid = pid; - } - irq_unlock(flags); -Index: uml-2.6.7/arch/um/include/line.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/line.h 2004-07-16 19:37:13.937906192 +0300 -+++ uml-2.6.7/arch/um/include/line.h 2004-07-16 19:47:23.702207928 +0300 -@@ -9,12 +9,14 @@ - #include "linux/list.h" - #include "linux/workqueue.h" - #include "linux/tty.h" -+#include "linux/interrupt.h" - #include "asm/semaphore.h" - #include "chan_user.h" - #include "mconsole_kern.h" - - struct line_driver { - char *name; -+ char *device_name; - char *devfs_name; - short major; - short minor_start; -@@ -67,8 +69,6 @@ - - #define LINES_INIT(n) { num : n } - --extern void line_interrupt(int irq, void *data, struct pt_regs *unused); --extern void line_write_interrupt(int irq, void *data, struct pt_regs *unused); - extern void line_close(struct line *lines, struct tty_struct *tty); - extern int line_open(struct line *lines, struct tty_struct *tty, - struct chan_opts *opts); -Index: uml-2.6.7/arch/um/sys-i386/fault.c -=================================================================== ---- uml-2.6.7.orig/arch/um/sys-i386/fault.c 2004-07-16 19:36:42.987611352 +0300 -+++ uml-2.6.7/arch/um/sys-i386/fault.c 2004-07-16 19:47:23.771197440 +0300 -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2002 - 2004 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -@@ -7,16 +7,24 @@ - #include "sysdep/ptrace.h" - #include "sysdep/sigcontext.h" - --extern unsigned long search_exception_table(unsigned long addr); -+/* These two are from asm-um/uaccess.h and linux/module.h, check them. */ -+struct exception_table_entry -+{ -+ unsigned long insn; -+ unsigned long fixup; -+}; - -+const struct exception_table_entry *search_exception_tables(unsigned long add); -+ -+/* Compare this to arch/i386/mm/extable.c:fixup_exception() */ - int arch_fixup(unsigned long address, void *sc_ptr) - { - struct sigcontext *sc = sc_ptr; -- unsigned long fixup; -+ const struct exception_table_entry *fixup; - - fixup = search_exception_tables(address); - if(fixup != 0){ -- sc->eip = fixup; -+ sc->eip = fixup->fixup; - return(1); - } - return(0); -Index: uml-2.6.7/arch/um/kernel/smp.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/smp.c 2004-07-16 19:36:13.630074376 +0300 -+++ uml-2.6.7/arch/um/kernel/smp.c 2004-07-16 19:47:23.736202760 +0300 -@@ -1,9 +1,15 @@ - /* -- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - - #include "linux/config.h" -+#include "linux/percpu.h" -+#include "asm/pgalloc.h" -+#include "asm/tlb.h" -+ -+/* For some reason, mmu_gathers are referenced when CONFIG_SMP is off. */ -+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - - #ifdef CONFIG_SMP - -@@ -23,7 +29,7 @@ - #include "os.h" - - /* CPU online map, set by smp_boot_cpus */ --unsigned long cpu_online_map = cpumask_of_cpu(0); -+unsigned long cpu_online_map = CPU_MASK_NONE; - - EXPORT_SYMBOL(cpu_online_map); - -@@ -55,7 +61,7 @@ - - void smp_send_reschedule(int cpu) - { -- write(cpu_data[cpu].ipi_pipe[1], "R", 1); -+ os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1); - num_reschedules_sent++; - } - -@@ -100,35 +106,34 @@ - - printk(KERN_INFO "Stopping all CPUs..."); - for(i = 0; i < num_online_cpus(); i++){ -- if(i == current->thread_info->cpu) -+ if(i == current_thread->cpu) - continue; -- write(cpu_data[i].ipi_pipe[1], "S", 1); -+ os_write_file(cpu_data[i].ipi_pipe[1], "S", 1); - } - printk("done\n"); - } - --static cpumask_t smp_commenced_mask; --static cpumask_t smp_callin_map = CPU_MASK_NONE; -+static cpumask_t smp_commenced_mask = CPU_MASK_NONE; -+static cpumask_t cpu_callin_map = CPU_MASK_NONE; - - static int idle_proc(void *cpup) - { - int cpu = (int) cpup, err; - - err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1); -- if(err) -- panic("CPU#%d failed to create IPI pipe, errno = %d", cpu, -- -err); -+ if(err < 0) -+ panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err); - - activate_ipi(cpu_data[cpu].ipi_pipe[0], - current->thread.mode.tt.extern_pid); - - wmb(); -- if (cpu_test_and_set(cpu, &smp_callin_map)) { -+ if (cpu_test_and_set(cpu, cpu_callin_map)) { - printk("huh, CPU#%d already present??\n", cpu); - BUG(); - } - -- while (!cpu_isset(cpu, &smp_commenced_mask)) -+ while (!cpu_isset(cpu, smp_commenced_mask)) - cpu_relax(); - - cpu_set(cpu, cpu_online_map); -@@ -143,16 +148,20 @@ - - current->thread.request.u.thread.proc = idle_proc; - current->thread.request.u.thread.arg = (void *) cpu; -- new_task = do_fork(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL, NULL); -- if(IS_ERR(new_task)) panic("do_fork failed in idle_thread"); -+ new_task = copy_process(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL, -+ NULL); -+ if(IS_ERR(new_task)) -+ panic("copy_process failed in idle_thread, error = %ld", -+ PTR_ERR(new_task)); - - cpu_tasks[cpu] = ((struct cpu_task) - { .pid = new_task->thread.mode.tt.extern_pid, - .task = new_task } ); - idle_threads[cpu] = new_task; -- CHOOSE_MODE(write(new_task->thread.mode.tt.switch_pipe[1], &c, -+ CHOOSE_MODE(os_write_file(new_task->thread.mode.tt.switch_pipe[1], &c, - sizeof(c)), - ({ panic("skas mode doesn't support SMP"); })); -+ wake_up_forked_process(new_task); - return(new_task); - } - -@@ -160,15 +169,17 @@ - { - struct task_struct *idle; - unsigned long waittime; -- int err, cpu; -+ int err, cpu, me = smp_processor_id(); - -- cpu_set(0, cpu_online_map); -- cpu_set(0, smp_callin_map); -+ cpu_clear(me, cpu_online_map); -+ cpu_set(me, cpu_online_map); -+ cpu_set(me, cpu_callin_map); - -- err = os_pipe(cpu_data[0].ipi_pipe, 1, 1); -- if(err) panic("CPU#0 failed to create IPI pipe, errno = %d", -err); -+ err = os_pipe(cpu_data[me].ipi_pipe, 1, 1); -+ if(err < 0) -+ panic("CPU#0 failed to create IPI pipe, errno = %d", -err); - -- activate_ipi(cpu_data[0].ipi_pipe[0], -+ activate_ipi(cpu_data[me].ipi_pipe[0], - current->thread.mode.tt.extern_pid); - - for(cpu = 1; cpu < ncpus; cpu++){ -@@ -180,10 +191,10 @@ - unhash_process(idle); - - waittime = 200000000; -- while (waittime-- && !cpu_isset(cpu, smp_callin_map)) -+ while (waittime-- && !cpu_isset(cpu, cpu_callin_map)) - cpu_relax(); - -- if (cpu_isset(cpu, smp_callin_map)) -+ if (cpu_isset(cpu, cpu_callin_map)) - printk("done\n"); - else printk("failed\n"); - } -@@ -216,7 +227,7 @@ - int fd; - - fd = cpu_data[cpu].ipi_pipe[0]; -- while (read(fd, &c, 1) == 1) { -+ while (os_read_file(fd, &c, 1) == 1) { - switch (c) { - case 'C': - smp_call_function_slave(cpu); -@@ -276,9 +287,9 @@ - info = _info; - - for (i=0;ithread_info->cpu) && -+ if((i != current_thread->cpu) && - cpu_isset(i, cpu_online_map)) -- write(cpu_data[i].ipi_pipe[1], "C", 1); -+ os_write_file(cpu_data[i].ipi_pipe[1], "C", 1); - - while (atomic_read(&scf_started) != cpus) - barrier(); -Index: uml-2.6.7/arch/um/drivers/slip_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/slip_user.c 2004-07-16 19:37:26.082060000 +0300 -+++ uml-2.6.7/arch/um/drivers/slip_user.c 2004-07-16 19:47:24.797041488 +0300 -@@ -4,11 +4,9 @@ - #include - #include - #include --#include --#include -+#include - #include - #include --#include - #include - #include "user_util.h" - #include "kern_util.h" -@@ -65,9 +63,9 @@ - { - struct slip_pre_exec_data *data = arg; - -- if(data->stdin != -1) dup2(data->stdin, 0); -+ if(data->stdin >= 0) dup2(data->stdin, 0); - dup2(data->stdout, 1); -- if(data->close_me != -1) close(data->close_me); -+ if(data->close_me >= 0) os_close_file(data->close_me); - } - - static int slip_tramp(char **argv, int fd) -@@ -77,8 +75,8 @@ - int status, pid, fds[2], err, output_len; - - err = os_pipe(fds, 1, 0); -- if(err){ -- printk("slip_tramp : pipe failed, errno = %d\n", -err); -+ if(err < 0){ -+ printk("slip_tramp : pipe failed, err = %d\n", -err); - return(err); - } - -@@ -96,16 +94,18 @@ - printk("slip_tramp : failed to allocate output " - "buffer\n"); - -- close(fds[1]); -+ os_close_file(fds[1]); - read_output(fds[0], output, output_len); - if(output != NULL){ - printk("%s", output); - kfree(output); - } -- if(waitpid(pid, &status, 0) < 0) err = errno; -+ CATCH_EINTR(err = waitpid(pid, &status, 0)); -+ if(err < 0) -+ err = errno; - else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){ - printk("'%s' didn't exit with status 0\n", argv[0]); -- err = EINVAL; -+ err = -EINVAL; - } - } - return(err); -@@ -118,15 +118,17 @@ - char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; - char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, - NULL }; -- int sfd, mfd, disc, sencap, err; -+ int sfd, mfd, err; - -- if((mfd = get_pty()) < 0){ -- printk("umn : Failed to open pty\n"); -- return(-1); -+ mfd = get_pty(); -+ if(mfd < 0){ -+ printk("umn : Failed to open pty, err = %d\n", -mfd); -+ return(mfd); - } -- if((sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0)) < 0){ -- printk("Couldn't open tty for slip line\n"); -- return(-1); -+ sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0); -+ if(sfd < 0){ -+ printk("Couldn't open tty for slip line, err = %d\n", -sfd); -+ return(sfd); - } - if(set_up_tty(sfd)) return(-1); - pri->slave = sfd; -@@ -138,28 +140,23 @@ - - err = slip_tramp(argv, sfd); - -- if(err != 0){ -- printk("slip_tramp failed - errno = %d\n", err); -- return(-err); -+ if(err < 0){ -+ printk("slip_tramp failed - err = %d\n", -err); -+ return(err); - } -- if(ioctl(pri->slave, SIOCGIFNAME, pri->name) < 0){ -- printk("SIOCGIFNAME failed, errno = %d\n", errno); -- return(-errno); -+ err = os_get_ifname(pri->slave, pri->name); -+ if(err < 0){ -+ printk("get_ifname failed, err = %d\n", -err); -+ return(err); - } - iter_addresses(pri->dev, open_addr, pri->name); - } - else { -- disc = N_SLIP; -- if(ioctl(sfd, TIOCSETD, &disc) < 0){ -- printk("Failed to set slip line discipline - " -- "errno = %d\n", errno); -- return(-errno); -- } -- sencap = 0; -- if(ioctl(sfd, SIOCSIFENCAP, &sencap) < 0){ -- printk("Failed to set slip encapsulation - " -- "errno = %d\n", errno); -- return(-errno); -+ err = os_set_slip(sfd); -+ if(err < 0){ -+ printk("Failed to set slip discipline encapsulation - " -+ "err = %d\n", -err); -+ return(err); - } - } - return(mfd); -@@ -181,9 +178,9 @@ - err = slip_tramp(argv, -1); - - if(err != 0) -- printk("slip_tramp failed - errno = %d\n", err); -- close(fd); -- close(pri->slave); -+ printk("slip_tramp failed - errno = %d\n", -err); -+ os_close_file(fd); -+ os_close_file(pri->slave); - pri->slave = -1; - } - -@@ -243,7 +240,7 @@ - { - struct slip_data *pri = data; - -- if(pri->slave == -1) return; -+ if(pri->slave < 0) return; - open_addr(addr, netmask, pri->name); - } - -@@ -252,7 +249,7 @@ - { - struct slip_data *pri = data; - -- if(pri->slave == -1) return; -+ if(pri->slave < 0) return; - close_addr(addr, netmask, pri->name); - } - -Index: uml-2.6.7/fs/hostfs/hostfs.h -=================================================================== ---- uml-2.6.7.orig/fs/hostfs/hostfs.h 2004-07-16 19:47:23.631218720 +0300 -+++ uml-2.6.7/fs/hostfs/hostfs.h 2004-07-16 19:47:23.781195920 +0300 -@@ -0,0 +1,79 @@ -+#ifndef __UM_FS_HOSTFS -+#define __UM_FS_HOSTFS -+ -+#include "os.h" -+ -+/* These are exactly the same definitions as in fs.h, but the names are -+ * changed so that this file can be included in both kernel and user files. -+ */ -+ -+#define HOSTFS_ATTR_MODE 1 -+#define HOSTFS_ATTR_UID 2 -+#define HOSTFS_ATTR_GID 4 -+#define HOSTFS_ATTR_SIZE 8 -+#define HOSTFS_ATTR_ATIME 16 -+#define HOSTFS_ATTR_MTIME 32 -+#define HOSTFS_ATTR_CTIME 64 -+#define HOSTFS_ATTR_ATIME_SET 128 -+#define HOSTFS_ATTR_MTIME_SET 256 -+#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */ -+#define HOSTFS_ATTR_ATTR_FLAG 1024 -+ -+struct hostfs_iattr { -+ unsigned int ia_valid; -+ mode_t ia_mode; -+ uid_t ia_uid; -+ gid_t ia_gid; -+ loff_t ia_size; -+ struct timespec ia_atime; -+ struct timespec ia_mtime; -+ struct timespec ia_ctime; -+ unsigned int ia_attr_flags; -+}; -+ -+extern int stat_file(const char *path, unsigned long long *inode_out, -+ int *mode_out, int *nlink_out, int *uid_out, int *gid_out, -+ unsigned long long *size_out, struct timespec *atime_out, -+ struct timespec *mtime_out, struct timespec *ctime_out, -+ int *blksize_out, unsigned long long *blocks_out); -+extern int access_file(char *path, int r, int w, int x); -+extern int open_file(char *path, int r, int w, int append); -+extern int file_type(const char *path, int *rdev); -+extern void *open_dir(char *path, int *err_out); -+extern char *read_dir(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out); -+extern void close_file(void *stream); -+extern void close_dir(void *stream); -+extern int read_file(int fd, unsigned long long *offset, char *buf, int len); -+extern int write_file(int fd, unsigned long long *offset, const char *buf, -+ int len); -+extern int lseek_file(int fd, long long offset, int whence); -+extern int file_create(char *name, int ur, int uw, int ux, int gr, -+ int gw, int gx, int or, int ow, int ox); -+extern int set_attr(const char *file, struct hostfs_iattr *attrs); -+extern int make_symlink(const char *from, const char *to); -+extern int unlink_file(const char *file); -+extern int do_mkdir(const char *file, int mode); -+extern int do_rmdir(const char *file); -+extern int do_mknod(const char *file, int mode, int dev); -+extern int link_file(const char *from, const char *to); -+extern int do_readlink(char *file, char *buf, int size); -+extern int rename_file(char *from, char *to); -+extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, -+ long long *bfree_out, long long *bavail_out, -+ long long *files_out, long long *ffree_out, -+ void *fsid_out, int fsid_size, long *namelen_out, -+ long *spare_out); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/kernel/sigio_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/sigio_user.c 2004-07-16 19:37:38.307201496 +0300 -+++ uml-2.6.7/arch/um/kernel/sigio_user.c 2004-07-16 19:47:24.458093016 +0300 -@@ -7,7 +7,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -17,6 +16,7 @@ - #include "init.h" - #include "user.h" - #include "kern_util.h" -+#include "user_util.h" - #include "sigio.h" - #include "helper.h" - #include "os.h" -@@ -26,7 +26,7 @@ - int pty_close_sigio = 0; - - /* Used as a flag during SIGIO testing early in boot */ --static int got_sigio = 0; -+static volatile int got_sigio = 0; - - void __init handler(int sig) - { -@@ -45,19 +45,18 @@ - - info->err = 0; - if(openpty(&info->master, &info->slave, NULL, NULL, NULL)) -- info->err = errno; -+ info->err = -errno; - } - - void __init check_one_sigio(void (*proc)(int, int)) - { - struct sigaction old, new; -- struct termios tt; - struct openpty_arg pty = { .master = -1, .slave = -1 }; -- int master, slave, flags; -+ int master, slave, err; - - initial_thread_cb(openpty_cb, &pty); - if(pty.err){ -- printk("openpty failed, errno = %d\n", pty.err); -+ printk("openpty failed, errno = %d\n", -pty.err); - return; - } - -@@ -69,23 +68,13 @@ - return; - } - -- if(tcgetattr(master, &tt) < 0) -- panic("check_sigio : tcgetattr failed, errno = %d\n", errno); -- cfmakeraw(&tt); -- if(tcsetattr(master, TCSADRAIN, &tt) < 0) -- panic("check_sigio : tcsetattr failed, errno = %d\n", errno); -- -- if((flags = fcntl(master, F_GETFL)) < 0) -- panic("tty_fds : fcntl F_GETFL failed, errno = %d\n", errno); -- -- if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || -- (fcntl(master, F_SETOWN, os_getpid()) < 0)) -- panic("check_sigio : fcntl F_SETFL or F_SETOWN failed, " -- "errno = %d\n", errno); -- -- if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)) -- panic("check_sigio : fcntl F_SETFL failed, errno = %d\n", -- errno); -+ err = __raw(master, 1, 0); //Not now, but complain so we now where we failed. -+ if (err < 0) -+ panic("check_sigio : __raw failed, errno = %d\n", -err); -+ -+ err = os_sigio_async(master, slave); -+ if(err < 0) -+ panic("tty_fds : sigio_async failed, err = %d\n", -err); - - if(sigaction(SIGIO, NULL, &old) < 0) - panic("check_sigio : sigaction 1 failed, errno = %d\n", errno); -@@ -97,8 +86,8 @@ - got_sigio = 0; - (*proc)(master, slave); - -- close(master); -- close(slave); -+ os_close_file(master); -+ os_close_file(slave); - - if(sigaction(SIGIO, &old, NULL) < 0) - panic("check_sigio : sigaction 3 failed, errno = %d\n", errno); -@@ -112,25 +101,25 @@ - printk("Checking that host ptys support output SIGIO..."); - - memset(buf, 0, sizeof(buf)); -- while(write(master, buf, sizeof(buf)) > 0) ; -+ -+ while(os_write_file(master, buf, sizeof(buf)) > 0) ; - if(errno != EAGAIN) - panic("check_sigio : write failed, errno = %d\n", errno); -- -- while(((n = read(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; -+ while(((n = os_read_file(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; - - if(got_sigio){ - printk("Yes\n"); - pty_output_sigio = 1; - } -- else if(errno == EAGAIN) printk("No, enabling workaround\n"); -- else panic("check_sigio : read failed, errno = %d\n", errno); -+ else if(n == -EAGAIN) printk("No, enabling workaround\n"); -+ else panic("check_sigio : read failed, err = %d\n", n); - } - - static void tty_close(int master, int slave) - { - printk("Checking that host ptys support SIGIO on close..."); - -- close(slave); -+ os_close_file(slave); - if(got_sigio){ - printk("Yes\n"); - pty_close_sigio = 1; -@@ -140,7 +129,8 @@ - - void __init check_sigio(void) - { -- if(access("/dev/ptmx", R_OK) && access("/dev/ptyp0", R_OK)){ -+ if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) && -+ (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){ - printk("No pseudo-terminals available - skipping pty SIGIO " - "check\n"); - return; -@@ -201,11 +191,10 @@ - p = &fds->poll[i]; - if(p->revents == 0) continue; - if(p->fd == sigio_private[1]){ -- n = read(sigio_private[1], &c, sizeof(c)); -+ n = os_read_file(sigio_private[1], &c, sizeof(c)); - if(n != sizeof(c)) - printk("write_sigio_thread : " -- "read failed, errno = %d\n", -- errno); -+ "read failed, err = %d\n", -n); - tmp = current_poll; - current_poll = next_poll; - next_poll = tmp; -@@ -218,10 +207,10 @@ - (fds->used - i) * sizeof(*fds->poll)); - } - -- n = write(respond_fd, &c, sizeof(c)); -+ n = os_write_file(respond_fd, &c, sizeof(c)); - if(n != sizeof(c)) - printk("write_sigio_thread : write failed, " -- "errno = %d\n", errno); -+ "err = %d\n", -n); - } - } - } -@@ -252,15 +241,15 @@ - char c; - - flags = set_signals(0); -- n = write(sigio_private[0], &c, sizeof(c)); -+ n = os_write_file(sigio_private[0], &c, sizeof(c)); - if(n != sizeof(c)){ -- printk("update_thread : write failed, errno = %d\n", errno); -+ printk("update_thread : write failed, err = %d\n", -n); - goto fail; - } - -- n = read(sigio_private[0], &c, sizeof(c)); -+ n = os_read_file(sigio_private[0], &c, sizeof(c)); - if(n != sizeof(c)){ -- printk("update_thread : read failed, errno = %d\n", errno); -+ printk("update_thread : read failed, err = %d\n", -n); - goto fail; - } - -@@ -271,10 +260,10 @@ - if(write_sigio_pid != -1) - os_kill_process(write_sigio_pid, 1); - write_sigio_pid = -1; -- close(sigio_private[0]); -- close(sigio_private[1]); -- close(write_sigio_fds[0]); -- close(write_sigio_fds[1]); -+ os_close_file(sigio_private[0]); -+ os_close_file(sigio_private[1]); -+ os_close_file(write_sigio_fds[0]); -+ os_close_file(write_sigio_fds[1]); - sigio_unlock(); - set_signals(flags); - } -@@ -369,15 +358,15 @@ - goto out; - - err = os_pipe(write_sigio_fds, 1, 1); -- if(err){ -+ if(err < 0){ - printk("write_sigio_workaround - os_pipe 1 failed, " -- "errno = %d\n", -err); -+ "err = %d\n", -err); - goto out; - } - err = os_pipe(sigio_private, 1, 1); -- if(err){ -+ if(err < 0){ - printk("write_sigio_workaround - os_pipe 2 failed, " -- "errno = %d\n", -err); -+ "err = %d\n", -err); - goto out_close1; - } - if(setup_initial_poll(sigio_private[1])) -@@ -399,11 +388,11 @@ - os_kill_process(write_sigio_pid, 1); - write_sigio_pid = -1; - out_close2: -- close(sigio_private[0]); -- close(sigio_private[1]); -+ os_close_file(sigio_private[0]); -+ os_close_file(sigio_private[1]); - out_close1: -- close(write_sigio_fds[0]); -- close(write_sigio_fds[1]); -+ os_close_file(write_sigio_fds[0]); -+ os_close_file(write_sigio_fds[1]); - sigio_unlock(); - } - -@@ -412,10 +401,16 @@ - int n; - char c; - -- n = read(fd, &c, sizeof(c)); -+ n = os_read_file(fd, &c, sizeof(c)); - if(n != sizeof(c)){ -- printk("read_sigio_fd - read failed, errno = %d\n", errno); -- return(-errno); -+ if(n < 0) { -+ printk("read_sigio_fd - read failed, err = %d\n", -n); -+ return(n); -+ } -+ else { -+ printk("read_sigio_fd - short read, bytes = %d\n", n); -+ return(-EIO); -+ } - } - return(n); - } -Index: uml-2.6.7/include/asm-um/system-i386.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/system-i386.h 2004-07-16 19:36:25.160321512 +0300 -+++ uml-2.6.7/include/asm-um/system-i386.h 2004-07-16 19:47:23.802192728 +0300 -@@ -2,36 +2,5 @@ - #define __UM_SYSTEM_I386_H - - #include "asm/system-generic.h" -- --static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, -- unsigned long new, int size) --{ -- unsigned long prev; -- switch (size) { -- case 1: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" -- : "=a"(prev) -- : "q"(new), "m"(*__xg(ptr)), "0"(old) -- : "memory"); -- return prev; -- case 2: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" -- : "=a"(prev) -- : "q"(new), "m"(*__xg(ptr)), "0"(old) -- : "memory"); -- return prev; -- case 4: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" -- : "=a"(prev) -- : "q"(new), "m"(*__xg(ptr)), "0"(old) -- : "memory"); -- return prev; -- } -- return old; --} -- --#define cmpxchg(ptr,o,n)\ -- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ -- (unsigned long)(n),sizeof(*(ptr)))) - - #endif -Index: uml-2.6.7/arch/um/sys-i386/time.c -=================================================================== ---- uml-2.6.7.orig/arch/um/sys-i386/time.c 2004-07-16 19:47:23.629219024 +0300 -+++ uml-2.6.7/arch/um/sys-i386/time.c 2004-07-16 19:47:23.773197136 +0300 -@@ -0,0 +1,24 @@ -+/* -+ * sys-i386/time.c -+ * Created 25.9.2002 Sapan Bhatia -+ * -+ */ -+ -+unsigned long long time_stamp(void) -+{ -+ unsigned long low, high; -+ -+ asm("rdtsc" : "=a" (low), "=d" (high)); -+ return((((unsigned long long) high) << 32) + low); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/Makefile-i386 -=================================================================== ---- uml-2.6.7.orig/arch/um/Makefile-i386 2004-07-16 19:37:17.426375864 +0300 -+++ uml-2.6.7/arch/um/Makefile-i386 2004-07-16 19:47:23.764198504 +0300 -@@ -16,22 +16,27 @@ - - SYS_HEADERS = $(SYS_DIR)/sc.h $(SYS_DIR)/thread.h - -+sys_prepare: $(SYS_DIR)/sc.h -+ - prepare: $(SYS_HEADERS) - -+filechk_$(SYS_DIR)/sc.h := $(SYS_UTIL_DIR)/mk_sc -+ - $(SYS_DIR)/sc.h: $(SYS_UTIL_DIR)/mk_sc -- $< > $@ -+ $(call filechk,$@) -+ -+filechk_$(SYS_DIR)/thread.h := $(SYS_UTIL_DIR)/mk_thread - - $(SYS_DIR)/thread.h: $(SYS_UTIL_DIR)/mk_thread -- $< > $@ -+ $(call filechk,$@) - --$(SYS_UTIL_DIR)/mk_sc: FORCE ; -- @$(call descend,$(SYS_UTIL_DIR),$@) -+$(SYS_UTIL_DIR)/mk_sc: scripts/basic/fixdep include/config/MARKER FORCE ; -+ $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@ - --$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) FORCE ; -- @$(call descend,$(SYS_UTIL_DIR),$@) -+$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) sys_prepare FORCE ; -+ $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@ - - $(SYS_UTIL_DIR): include/asm FORCE -- @$(call descend,$@,) -+ $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) - --sysclean : -- rm -f $(SYS_HEADERS) -+CLEAN_FILES += $(SYS_HEADERS) -Index: uml-2.6.7/include/asm-um/smp.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/smp.h 2004-07-16 19:35:49.282775728 +0300 -+++ uml-2.6.7/include/asm-um/smp.h 2004-07-16 19:47:23.795193792 +0300 -@@ -10,7 +10,7 @@ - - extern cpumask_t cpu_online_map; - --#define smp_processor_id() (current->thread_info->cpu) -+#define smp_processor_id() (current_thread->cpu) - #define cpu_logical_map(n) (n) - #define cpu_number_map(n) (n) - #define PROC_CHANGE_PENALTY 15 /* Pick a number, any number */ -Index: uml-2.6.7/arch/um/Kconfig_block -=================================================================== ---- uml-2.6.7.orig/arch/um/Kconfig_block 2004-07-16 19:37:05.104249112 +0300 -+++ uml-2.6.7/arch/um/Kconfig_block 2004-07-16 19:47:23.711206560 +0300 -@@ -29,6 +29,20 @@ - wise choice too. In all other cases (for example, if you're just - playing around with User-Mode Linux) you can choose N. - -+# Turn this back on when the driver actually works -+# -+#config BLK_DEV_COW -+# tristate "COW block device" -+# help -+# This is a layered driver which sits above two other block devices. -+# One is read-only, and the other is a read-write layer which stores -+# all changes. This provides the illusion that the read-only layer -+# can be mounted read-write and changed. -+ -+config BLK_DEV_COW_COMMON -+ bool -+ default BLK_DEV_COW || BLK_DEV_UBD -+ - config BLK_DEV_LOOP - tristate "Loopback device support" - -Index: uml-2.6.7/arch/um/kernel/initrd_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/initrd_user.c 2004-07-16 19:36:07.784962968 +0300 -+++ uml-2.6.7/arch/um/kernel/initrd_user.c 2004-07-16 19:47:23.715205952 +0300 -@@ -6,7 +6,6 @@ - #include - #include - #include --#include - #include - - #include "user_util.h" -@@ -19,13 +18,15 @@ - { - int fd, n; - -- if((fd = os_open_file(filename, of_read(OPENFLAGS()), 0)) < 0){ -- printk("Opening '%s' failed - errno = %d\n", filename, errno); -+ fd = os_open_file(filename, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Opening '%s' failed - err = %d\n", filename, -fd); - return(-1); - } -- if((n = read(fd, buf, size)) != size){ -- printk("Read of %d bytes from '%s' returned %d, errno = %d\n", -- size, filename, n, errno); -+ n = os_read_file(fd, buf, size); -+ if(n != size){ -+ printk("Read of %d bytes from '%s' failed, err = %d\n", size, -+ filename, -n); - return(-1); - } - return(0); -Index: uml-2.6.7/arch/um/include/sysdep-i386/frame_user.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/sysdep-i386/frame_user.h 2004-07-16 19:37:34.631760248 +0300 -+++ uml-2.6.7/arch/um/include/sysdep-i386/frame_user.h 2004-07-16 19:47:23.707207168 +0300 -@@ -56,26 +56,26 @@ - * it would have to be __builtin_frame_address(1). - */ - --static inline unsigned long frame_restorer(void) --{ -- unsigned long *fp; -- -- fp = __builtin_frame_address(0); -- return((unsigned long) (fp + 1)); --} -+#define frame_restorer() \ -+({ \ -+ unsigned long *fp; \ -+\ -+ fp = __builtin_frame_address(0); \ -+ ((unsigned long) (fp + 1)); \ -+}) - - /* Similarly, this returns the value of sp when the handler was first - * entered. This is used to calculate the proper sp when delivering - * signals. - */ - --static inline unsigned long frame_sp(void) --{ -- unsigned long *fp; -- -- fp = __builtin_frame_address(0); -- return((unsigned long) (fp + 1)); --} -+#define frame_sp() \ -+({ \ -+ unsigned long *fp; \ -+\ -+ fp = __builtin_frame_address(0); \ -+ ((unsigned long) (fp + 1)); \ -+}) - - #endif - -Index: uml-2.6.7/arch/um/drivers/tty.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/tty.c 2004-07-16 19:36:25.316297800 +0300 -+++ uml-2.6.7/arch/um/drivers/tty.c 2004-07-16 19:47:23.695208992 +0300 -@@ -5,7 +5,6 @@ - - #include - #include --#include - #include - #include - #include "chan_user.h" -@@ -30,7 +29,8 @@ - } - str++; - -- if((data = um_kmalloc(sizeof(*data))) == NULL) -+ data = um_kmalloc(sizeof(*data)); -+ if(data == NULL) - return(NULL); - *data = ((struct tty_chan) { .dev = str, - .raw = opts->raw }); -Index: uml-2.6.7/arch/um/drivers/net_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/net_kern.c 2004-07-16 19:36:37.183493712 +0300 -+++ uml-2.6.7/arch/um/drivers/net_kern.c 2004-07-16 19:47:23.689209904 +0300 -@@ -26,6 +26,7 @@ - #include "mconsole_kern.h" - #include "init.h" - #include "irq_user.h" -+#include "irq_kern.h" - - static spinlock_t opened_lock = SPIN_LOCK_UNLOCKED; - LIST_HEAD(opened); -@@ -37,7 +38,8 @@ - struct sk_buff *skb; - - /* If we can't allocate memory, try again next round. */ -- if ((skb = dev_alloc_skb(dev->mtu)) == NULL) { -+ skb = dev_alloc_skb(dev->mtu); -+ if (skb == NULL) { - lp->stats.rx_dropped++; - return 0; - } -@@ -61,14 +63,14 @@ - return pkt_len; - } - --void uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) -+irqreturn_t uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) - { - struct net_device *dev = dev_id; - struct uml_net_private *lp = dev->priv; - int err; - - if(!netif_running(dev)) -- return; -+ return(IRQ_NONE); - - spin_lock(&lp->lock); - while((err = uml_net_rx(dev)) > 0) ; -@@ -83,6 +85,7 @@ - - out: - spin_unlock(&lp->lock); -+ return(IRQ_HANDLED); - } - - static int uml_net_open(struct net_device *dev) -@@ -250,37 +253,6 @@ - #endif - } - --/* -- * default do nothing hard header packet routines for struct net_device init. -- * real ethernet transports will overwrite with real routines. -- */ --static int uml_net_hard_header(struct sk_buff *skb, struct net_device *dev, -- unsigned short type, void *daddr, void *saddr, unsigned len) --{ -- return(0); /* no change */ --} -- --static int uml_net_rebuild_header(struct sk_buff *skb) --{ -- return(0); /* ignore */ --} -- --static int uml_net_header_cache(struct neighbour *neigh, struct hh_cache *hh) --{ -- return(-1); /* fail */ --} -- --static void uml_net_header_cache_update(struct hh_cache *hh, -- struct net_device *dev, unsigned char * haddr) --{ -- /* ignore */ --} -- --static int uml_net_header_parse(struct sk_buff *skb, unsigned char *haddr) --{ -- return(0); /* nothing */ --} -- - static spinlock_t devices_lock = SPIN_LOCK_UNLOCKED; - static struct list_head devices = LIST_HEAD_INIT(devices); - -@@ -290,7 +262,7 @@ - struct uml_net *device; - struct net_device *dev; - struct uml_net_private *lp; -- int err, size; -+ int save, err, size; - - size = transport->private_size + sizeof(struct uml_net_private) + - sizeof(((struct uml_net_private *) 0)->user); -@@ -332,12 +304,6 @@ - snprintf(dev->name, sizeof(dev->name), "eth%d", n); - device->dev = dev; - -- dev->hard_header = uml_net_hard_header; -- dev->rebuild_header = uml_net_rebuild_header; -- dev->hard_header_cache = uml_net_header_cache; -- dev->header_cache_update= uml_net_header_cache_update; -- dev->hard_header_parse = uml_net_header_parse; -- - (*transport->kern->init)(dev, init); - - dev->mtu = transport->user->max_packet; -@@ -364,21 +330,29 @@ - } - lp = dev->priv; - -- INIT_LIST_HEAD(&lp->list); -- spin_lock_init(&lp->lock); -- lp->dev = dev; -- lp->fd = -1; -- lp->mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0 }; -- lp->have_mac = device->have_mac; -- lp->protocol = transport->kern->protocol; -- lp->open = transport->user->open; -- lp->close = transport->user->close; -- lp->remove = transport->user->remove; -- lp->read = transport->kern->read; -- lp->write = transport->kern->write; -- lp->add_address = transport->user->add_address; -- lp->delete_address = transport->user->delete_address; -- lp->set_mtu = transport->user->set_mtu; -+ /* lp.user is the first four bytes of the transport data, which -+ * has already been initialized. This structure assignment will -+ * overwrite that, so we make sure that .user gets overwritten with -+ * what it already has. -+ */ -+ save = lp->user[0]; -+ *lp = ((struct uml_net_private) -+ { .list = LIST_HEAD_INIT(lp->list), -+ .lock = SPIN_LOCK_UNLOCKED, -+ .dev = dev, -+ .fd = -1, -+ .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0}, -+ .have_mac = device->have_mac, -+ .protocol = transport->kern->protocol, -+ .open = transport->user->open, -+ .close = transport->user->close, -+ .remove = transport->user->remove, -+ .read = transport->kern->read, -+ .write = transport->kern->write, -+ .add_address = transport->user->add_address, -+ .delete_address = transport->user->delete_address, -+ .set_mtu = transport->user->set_mtu, -+ .user = { save } }); - - init_timer(&lp->tl); - lp->tl.function = uml_net_user_timer_expire; -@@ -611,7 +585,8 @@ - unregister_netdev(dev); - - list_del(&device->list); -- free_netdev(device); -+ kfree(device); -+ free_netdev(dev); - return(0); - } - -Index: uml-2.6.7/arch/um/kernel/tt/syscall_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/syscall_kern.c 2004-07-16 19:35:49.170792752 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/syscall_kern.c 2004-07-16 19:47:23.749200784 +0300 -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -Index: uml-2.6.7/arch/um/kernel/config.c.in -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/config.c.in 2004-07-16 19:37:08.932667104 +0300 -+++ uml-2.6.7/arch/um/kernel/config.c.in 2004-07-16 19:47:23.712206408 +0300 -@@ -7,9 +7,7 @@ - #include - #include "init.h" - --static __initdata char *config = " --CONFIG --"; -+static __initdata char *config = "CONFIG"; - - static int __init print_config(char *line, int *add) - { -Index: uml-2.6.7/arch/um/include/mconsole.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/mconsole.h 2004-07-16 19:36:32.705174520 +0300 -+++ uml-2.6.7/arch/um/include/mconsole.h 2004-07-16 19:47:23.703207776 +0300 -@@ -41,11 +41,13 @@ - - struct mc_request; - -+enum mc_context { MCONSOLE_INTR, MCONSOLE_PROC }; -+ - struct mconsole_command - { - char *command; - void (*handler)(struct mc_request *req); -- int as_interrupt; -+ enum mc_context context; - }; - - struct mc_request -@@ -77,6 +79,8 @@ - extern void mconsole_cad(struct mc_request *req); - extern void mconsole_stop(struct mc_request *req); - extern void mconsole_go(struct mc_request *req); -+extern void mconsole_log(struct mc_request *req); -+extern void mconsole_proc(struct mc_request *req); - - extern int mconsole_get_request(int fd, struct mc_request *req); - extern int mconsole_notify(char *sock_name, int type, const void *data, -Index: uml-2.6.7/arch/um/kernel/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/Makefile 2004-07-16 19:37:05.102249416 +0300 -+++ uml-2.6.7/arch/um/kernel/Makefile 2004-07-16 19:47:23.718205496 +0300 -@@ -7,11 +7,11 @@ - - obj-y = checksum.o config.o exec_kern.o exitcode.o frame_kern.o frame.o \ - helper.o init_task.o irq.o irq_user.o ksyms.o mem.o mem_user.o \ -- process.o process_kern.o ptrace.o reboot.o resource.o sigio_user.o \ -- sigio_kern.o signal_kern.o signal_user.o smp.o syscall_kern.o \ -- syscall_user.o sysrq.o sys_call_table.o tempfile.o time.o \ -- time_kern.o tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o \ -- umid.o user_syms.o user_util.o -+ physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \ -+ sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \ -+ syscall_kern.o syscall_user.o sysrq.o sys_call_table.o tempfile.o \ -+ time.o time_kern.o tlb.o trap_kern.o trap_user.o uaccess_user.o \ -+ um_arch.o umid.o user_util.o - - obj-$(CONFIG_BLK_DEV_INITRD) += initrd_kern.o initrd_user.o - obj-$(CONFIG_GPROF) += gprof_syms.o -@@ -24,43 +24,27 @@ - user-objs-$(CONFIG_TTY_LOG) += tty_log.o - - USER_OBJS := $(filter %_user.o,$(obj-y)) $(user-objs-y) config.o helper.o \ -- process.o tempfile.o time.o tty_log.o umid.o user_util.o user_syms.o -+ process.o tempfile.o time.o tty_log.o umid.o user_util.o - USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) - --DMODULES-$(CONFIG_MODULES) = -D__CONFIG_MODULES__ --DMODVERSIONS-$(CONFIG_MODVERSIONS) = -D__CONFIG_MODVERSIONS__ -- -- --CFLAGS_user_syms.o = -D__AUTOCONF_INCLUDED__ $(DMODULES-y) $(DMODVERSIONS-y) \ -- -I/usr/include -I../include -- - CFLAGS_frame.o := $(patsubst -fomit-frame-pointer,,$(USER_CFLAGS)) - --$(USER_OBJS) : %.o: %.c -- $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< -- - # This has to be separate because it needs be compiled with frame pointers - # regardless of how the rest of the kernel is built. - - $(obj)/frame.o: $(src)/frame.c - $(CC) $(CFLAGS_$(notdir $@)) -c -o $@ $< - --QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; while() { $$_ =~ s/CONFIG/$$config/; print $$_ }' -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< - --$(obj)/config.c : $(src)/config.c.in $(TOPDIR)/.config -- $(PERL) -e $(QUOTE) < $(src)/config.c.in > $@ -+QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; $$config =~ s/\n/\\n"\n"/g ; while() { $$_ =~ s/CONFIG/$$config/; print $$_ }' - - $(obj)/config.o : $(obj)/config.c - --clean: -- rm -f config.c -- for dir in $(subdir-y) ; do $(MAKE) -C $$dir clean; done -- --modules: -- --fastdep: -- --dep: -- --archmrproper: clean -+quiet_cmd_quote = QUOTE $@ -+cmd_quote = $(PERL) -e $(QUOTE) < $< > $@ - -+targets += config.c -+$(obj)/config.c : $(src)/config.c.in $(TOPDIR)/.config FORCE -+ $(call if_changed,quote) -Index: uml-2.6.7/arch/um/drivers/xterm_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/xterm_kern.c 2004-07-16 19:37:08.927667864 +0300 -+++ uml-2.6.7/arch/um/drivers/xterm_kern.c 2004-07-16 19:47:23.699208384 +0300 -@@ -5,9 +5,12 @@ - - #include "linux/errno.h" - #include "linux/slab.h" -+#include "linux/signal.h" -+#include "linux/interrupt.h" - #include "asm/semaphore.h" - #include "asm/irq.h" - #include "irq_user.h" -+#include "irq_kern.h" - #include "kern_util.h" - #include "os.h" - #include "xterm.h" -@@ -19,17 +22,18 @@ - int new_fd; - }; - --static void xterm_interrupt(int irq, void *data, struct pt_regs *regs) -+static irqreturn_t xterm_interrupt(int irq, void *data, struct pt_regs *regs) - { - struct xterm_wait *xterm = data; - int fd; - - fd = os_rcv_fd(xterm->fd, &xterm->pid); - if(fd == -EAGAIN) -- return; -+ return(IRQ_NONE); - - xterm->new_fd = fd; - up(&xterm->sem); -+ return(IRQ_HANDLED); - } - - int xterm_fd(int socket, int *pid_out) -@@ -54,7 +58,8 @@ - if(err){ - printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, " - "err = %d\n", err); -- return(err); -+ ret = err; -+ goto out; - } - down(&data->sem); - -@@ -62,6 +67,7 @@ - - ret = data->new_fd; - *pid_out = data->pid; -+ out: - kfree(data); - - return(ret); -Index: uml-2.6.7/arch/um/drivers/cow.h -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/cow.h 2004-07-16 19:47:23.607222368 +0300 -+++ uml-2.6.7/arch/um/drivers/cow.h 2004-07-16 19:47:23.678211576 +0300 -@@ -0,0 +1,41 @@ -+#ifndef __COW_H__ -+#define __COW_H__ -+ -+#include -+ -+#if __BYTE_ORDER == __BIG_ENDIAN -+# define ntohll(x) (x) -+# define htonll(x) (x) -+#elif __BYTE_ORDER == __LITTLE_ENDIAN -+# define ntohll(x) bswap_64(x) -+# define htonll(x) bswap_64(x) -+#else -+#error "__BYTE_ORDER not defined" -+#endif -+ -+extern int init_cow_file(int fd, char *cow_file, char *backing_file, -+ int sectorsize, int alignment, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out); -+ -+extern int file_reader(__u64 offset, char *buf, int len, void *arg); -+extern int read_cow_header(int (*reader)(__u64, char *, int, void *), -+ void *arg, __u32 *version_out, -+ char **backing_file_out, time_t *mtime_out, -+ __u64 *size_out, int *sectorsize_out, -+ __u32 *align_out, int *bitmap_offset_out); -+ -+extern int write_cow_header(char *cow_file, int fd, char *backing_file, -+ int sectorsize, int alignment, long long *size); -+ -+extern void cow_sizes(int version, __u64 size, int sectorsize, int align, -+ int bitmap_offset, unsigned long *bitmap_len_out, -+ int *data_offset_out); -+ -+#endif -+ -+/* -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/sys-ppc/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/sys-ppc/Makefile 2004-07-16 19:36:37.299476080 +0300 -+++ uml-2.6.7/arch/um/sys-ppc/Makefile 2004-07-16 19:47:23.779196224 +0300 -@@ -66,13 +66,4 @@ - $(CC) $(EXTRA_AFLAGS) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o - rm -f asm - --clean: -- rm -f $(OBJS) -- rm -f ppc_defs.h -- rm -f checksum.S semaphore.c mk_defs.c -- --fastdep: -- --dep: -- --modules: -+clean-files := $(OBJS) ppc_defs.h checksum.S semaphore.c mk_defs.c -Index: uml-2.6.7/arch/um/os-Linux/user_syms.c -=================================================================== ---- uml-2.6.7.orig/arch/um/os-Linux/user_syms.c 2004-07-16 19:47:23.629219024 +0300 -+++ uml-2.6.7/arch/um/os-Linux/user_syms.c 2004-07-16 19:47:23.769197744 +0300 -@@ -0,0 +1,88 @@ -+#include "linux/types.h" -+#include "linux/module.h" -+ -+/* Some of this are builtin function (some are not but could in the future), -+ * so I *must* declare good prototypes for them and then EXPORT them. -+ * The kernel code uses the macro defined by include/linux/string.h, -+ * so I undef macros; the userspace code does not include that and I -+ * add an EXPORT for the glibc one.*/ -+ -+#undef strlen -+#undef strstr -+#undef memcpy -+#undef memset -+ -+extern size_t strlen(const char *); -+extern void *memcpy(void *, const void *, size_t); -+extern void *memset(void *, int, size_t); -+extern int printf(const char *, ...); -+ -+EXPORT_SYMBOL(strlen); -+EXPORT_SYMBOL(memcpy); -+EXPORT_SYMBOL(memset); -+EXPORT_SYMBOL(printf); -+ -+EXPORT_SYMBOL(strstr); -+ -+/* Here, instead, I can provide a fake prototype. Yes, someone cares: genksyms. -+ * However, the modules will use the CRC defined *here*, no matter if it is -+ * good; so the versions of these symbols will always match -+ */ -+#define EXPORT_SYMBOL_PROTO(sym) \ -+ int sym(void); \ -+ EXPORT_SYMBOL(sym); -+ -+EXPORT_SYMBOL_PROTO(__errno_location); -+ -+EXPORT_SYMBOL_PROTO(access); -+EXPORT_SYMBOL_PROTO(open); -+EXPORT_SYMBOL_PROTO(open64); -+EXPORT_SYMBOL_PROTO(close); -+EXPORT_SYMBOL_PROTO(read); -+EXPORT_SYMBOL_PROTO(write); -+EXPORT_SYMBOL_PROTO(dup2); -+EXPORT_SYMBOL_PROTO(__xstat); -+EXPORT_SYMBOL_PROTO(__lxstat); -+EXPORT_SYMBOL_PROTO(__lxstat64); -+EXPORT_SYMBOL_PROTO(lseek); -+EXPORT_SYMBOL_PROTO(lseek64); -+EXPORT_SYMBOL_PROTO(chown); -+EXPORT_SYMBOL_PROTO(truncate); -+EXPORT_SYMBOL_PROTO(utime); -+EXPORT_SYMBOL_PROTO(chmod); -+EXPORT_SYMBOL_PROTO(rename); -+EXPORT_SYMBOL_PROTO(__xmknod); -+ -+EXPORT_SYMBOL_PROTO(symlink); -+EXPORT_SYMBOL_PROTO(link); -+EXPORT_SYMBOL_PROTO(unlink); -+EXPORT_SYMBOL_PROTO(readlink); -+ -+EXPORT_SYMBOL_PROTO(mkdir); -+EXPORT_SYMBOL_PROTO(rmdir); -+EXPORT_SYMBOL_PROTO(opendir); -+EXPORT_SYMBOL_PROTO(readdir); -+EXPORT_SYMBOL_PROTO(closedir); -+EXPORT_SYMBOL_PROTO(seekdir); -+EXPORT_SYMBOL_PROTO(telldir); -+ -+EXPORT_SYMBOL_PROTO(ioctl); -+ -+EXPORT_SYMBOL_PROTO(pread64); -+EXPORT_SYMBOL_PROTO(pwrite64); -+ -+EXPORT_SYMBOL_PROTO(statfs); -+EXPORT_SYMBOL_PROTO(statfs64); -+ -+EXPORT_SYMBOL_PROTO(getuid); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/include/irq_kern.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/irq_kern.h 2004-07-16 19:47:23.612221608 +0300 -+++ uml-2.6.7/arch/um/include/irq_kern.h 2004-07-16 19:47:23.701208080 +0300 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __IRQ_KERN_H__ -+#define __IRQ_KERN_H__ -+ -+#include "linux/interrupt.h" -+ -+extern int um_request_irq(unsigned int irq, int fd, int type, -+ irqreturn_t (*handler)(int, void *, -+ struct pt_regs *), -+ unsigned long irqflags, const char * devname, -+ void *dev_id); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/fs/hppfs/hppfs_kern.c -=================================================================== ---- uml-2.6.7.orig/fs/hppfs/hppfs_kern.c 2004-07-16 19:47:23.632218568 +0300 -+++ uml-2.6.7/fs/hppfs/hppfs_kern.c 2004-07-16 19:47:23.785195312 +0300 -@@ -0,0 +1,811 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "os.h" -+ -+static int init_inode(struct inode *inode, struct dentry *dentry); -+ -+struct hppfs_data { -+ struct list_head list; -+ char contents[PAGE_SIZE - sizeof(struct list_head)]; -+}; -+ -+struct hppfs_private { -+ struct file proc_file; -+ int host_fd; -+ loff_t len; -+ struct hppfs_data *contents; -+}; -+ -+struct hppfs_inode_info { -+ struct dentry *proc_dentry; -+ struct inode vfs_inode; -+}; -+ -+static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode) -+{ -+ return(list_entry(inode, struct hppfs_inode_info, vfs_inode)); -+} -+ -+#define HPPFS_SUPER_MAGIC 0xb00000ee -+ -+static struct super_operations hppfs_sbops; -+ -+static int is_pid(struct dentry *dentry) -+{ -+ struct super_block *sb; -+ int i; -+ -+ sb = dentry->d_sb; -+ if((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root)) -+ return(0); -+ -+ for(i = 0; i < dentry->d_name.len; i++){ -+ if(!isdigit(dentry->d_name.name[i])) -+ return(0); -+ } -+ return(1); -+} -+ -+static char *dentry_name(struct dentry *dentry, int extra) -+{ -+ struct dentry *parent; -+ char *root, *name; -+ const char *seg_name; -+ int len, seg_len; -+ -+ len = 0; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ if(is_pid(parent)) -+ len += strlen("pid") + 1; -+ else len += parent->d_name.len + 1; -+ parent = parent->d_parent; -+ } -+ -+ root = "proc"; -+ len += strlen(root); -+ name = kmalloc(len + extra + 1, GFP_KERNEL); -+ if(name == NULL) return(NULL); -+ -+ name[len] = '\0'; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ if(is_pid(parent)){ -+ seg_name = "pid"; -+ seg_len = strlen("pid"); -+ } -+ else { -+ seg_name = parent->d_name.name; -+ seg_len = parent->d_name.len; -+ } -+ -+ len -= seg_len + 1; -+ name[len] = '/'; -+ strncpy(&name[len + 1], seg_name, seg_len); -+ parent = parent->d_parent; -+ } -+ strncpy(name, root, strlen(root)); -+ return(name); -+} -+ -+struct dentry_operations hppfs_dentry_ops = { -+}; -+ -+static int file_removed(struct dentry *dentry, const char *file) -+{ -+ char *host_file; -+ int extra, fd; -+ -+ extra = 0; -+ if(file != NULL) extra += strlen(file) + 1; -+ -+ host_file = dentry_name(dentry, extra + strlen("/remove")); -+ if(host_file == NULL){ -+ printk("file_removed : allocation failed\n"); -+ return(-ENOMEM); -+ } -+ -+ if(file != NULL){ -+ strcat(host_file, "/"); -+ strcat(host_file, file); -+ } -+ strcat(host_file, "/remove"); -+ -+ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); -+ kfree(host_file); -+ if(fd > 0){ -+ os_close_file(fd); -+ return(1); -+ } -+ return(0); -+} -+ -+static void hppfs_read_inode(struct inode *ino) -+{ -+ struct inode *proc_ino; -+ -+ if(HPPFS_I(ino)->proc_dentry == NULL) -+ return; -+ -+ proc_ino = HPPFS_I(ino)->proc_dentry->d_inode; -+ ino->i_uid = proc_ino->i_uid; -+ ino->i_gid = proc_ino->i_gid; -+ ino->i_atime = proc_ino->i_atime; -+ ino->i_mtime = proc_ino->i_mtime; -+ ino->i_ctime = proc_ino->i_ctime; -+ ino->i_ino = proc_ino->i_ino; -+ ino->i_mode = proc_ino->i_mode; -+ ino->i_nlink = proc_ino->i_nlink; -+ ino->i_size = proc_ino->i_size; -+ ino->i_blksize = proc_ino->i_blksize; -+ ino->i_blocks = proc_ino->i_blocks; -+} -+ -+static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, -+ struct nameidata *nd) -+{ -+ struct dentry *proc_dentry, *new, *parent; -+ struct inode *inode; -+ int err, deleted; -+ -+ deleted = file_removed(dentry, NULL); -+ if(deleted < 0) -+ return(ERR_PTR(deleted)); -+ else if(deleted) -+ return(ERR_PTR(-ENOENT)); -+ -+ err = -ENOMEM; -+ parent = HPPFS_I(ino)->proc_dentry; -+ down(&parent->d_inode->i_sem); -+ proc_dentry = d_lookup(parent, &dentry->d_name); -+ if(proc_dentry == NULL){ -+ proc_dentry = d_alloc(parent, &dentry->d_name); -+ if(proc_dentry == NULL){ -+ up(&parent->d_inode->i_sem); -+ goto out; -+ } -+ new = (*parent->d_inode->i_op->lookup)(parent->d_inode, -+ proc_dentry, NULL); -+ if(new){ -+ dput(proc_dentry); -+ proc_dentry = new; -+ } -+ } -+ up(&parent->d_inode->i_sem); -+ -+ if(IS_ERR(proc_dentry)) -+ return(proc_dentry); -+ -+ inode = iget(ino->i_sb, 0); -+ if(inode == NULL) -+ goto out_dput; -+ -+ err = init_inode(inode, proc_dentry); -+ if(err) -+ goto out_put; -+ -+ hppfs_read_inode(inode); -+ -+ d_add(dentry, inode); -+ dentry->d_op = &hppfs_dentry_ops; -+ return(NULL); -+ -+ out_put: -+ iput(inode); -+ out_dput: -+ dput(proc_dentry); -+ out: -+ return(ERR_PTR(err)); -+} -+ -+static struct inode_operations hppfs_file_iops = { -+}; -+ -+static ssize_t read_proc(struct file *file, char *buf, ssize_t count, -+ loff_t *ppos, int is_user) -+{ -+ ssize_t (*read)(struct file *, char *, size_t, loff_t *); -+ ssize_t n; -+ -+ read = file->f_dentry->d_inode->i_fop->read; -+ -+ if(!is_user) -+ set_fs(KERNEL_DS); -+ -+ n = (*read)(file, buf, count, &file->f_pos); -+ -+ if(!is_user) -+ set_fs(USER_DS); -+ -+ if(ppos) *ppos = file->f_pos; -+ return(n); -+} -+ -+static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count) -+{ -+ ssize_t n; -+ int cur, err; -+ char *new_buf; -+ -+ n = -ENOMEM; -+ new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); -+ if(new_buf == NULL){ -+ printk("hppfs_read_file : kmalloc failed\n"); -+ goto out; -+ } -+ n = 0; -+ while(count > 0){ -+ cur = min_t(ssize_t, count, PAGE_SIZE); -+ err = os_read_file(fd, new_buf, cur); -+ if(err < 0){ -+ printk("hppfs_read : read failed, errno = %d\n", -+ count); -+ n = err; -+ goto out_free; -+ } -+ else if(err == 0) -+ break; -+ -+ if(copy_to_user(buf, new_buf, err)){ -+ n = -EFAULT; -+ goto out_free; -+ } -+ n += err; -+ count -= err; -+ } -+ out_free: -+ kfree(new_buf); -+ out: -+ return(n); -+} -+ -+static ssize_t hppfs_read(struct file *file, char *buf, size_t count, -+ loff_t *ppos) -+{ -+ struct hppfs_private *hppfs = file->private_data; -+ struct hppfs_data *data; -+ loff_t off; -+ int err; -+ -+ if(hppfs->contents != NULL){ -+ if(*ppos >= hppfs->len) return(0); -+ -+ data = hppfs->contents; -+ off = *ppos; -+ while(off >= sizeof(data->contents)){ -+ data = list_entry(data->list.next, struct hppfs_data, -+ list); -+ off -= sizeof(data->contents); -+ } -+ -+ if(off + count > hppfs->len) -+ count = hppfs->len - off; -+ copy_to_user(buf, &data->contents[off], count); -+ *ppos += count; -+ } -+ else if(hppfs->host_fd != -1){ -+ err = os_seek_file(hppfs->host_fd, *ppos); -+ if(err){ -+ printk("hppfs_read : seek failed, errno = %d\n", err); -+ return(err); -+ } -+ count = hppfs_read_file(hppfs->host_fd, buf, count); -+ if(count > 0) -+ *ppos += count; -+ } -+ else count = read_proc(&hppfs->proc_file, buf, count, ppos, 1); -+ -+ return(count); -+} -+ -+static ssize_t hppfs_write(struct file *file, const char *buf, size_t len, -+ loff_t *ppos) -+{ -+ struct hppfs_private *data = file->private_data; -+ struct file *proc_file = &data->proc_file; -+ ssize_t (*write)(struct file *, const char *, size_t, loff_t *); -+ int err; -+ -+ write = proc_file->f_dentry->d_inode->i_fop->write; -+ -+ proc_file->f_pos = file->f_pos; -+ err = (*write)(proc_file, buf, len, &proc_file->f_pos); -+ file->f_pos = proc_file->f_pos; -+ -+ return(err); -+} -+ -+static int open_host_sock(char *host_file, int *filter_out) -+{ -+ char *end; -+ int fd; -+ -+ end = &host_file[strlen(host_file)]; -+ strcpy(end, "/rw"); -+ *filter_out = 1; -+ fd = os_connect_socket(host_file); -+ if(fd > 0) -+ return(fd); -+ -+ strcpy(end, "/r"); -+ *filter_out = 0; -+ fd = os_connect_socket(host_file); -+ return(fd); -+} -+ -+static void free_contents(struct hppfs_data *head) -+{ -+ struct hppfs_data *data; -+ struct list_head *ele, *next; -+ -+ if(head == NULL) return; -+ -+ list_for_each_safe(ele, next, &head->list){ -+ data = list_entry(ele, struct hppfs_data, list); -+ kfree(data); -+ } -+ kfree(head); -+} -+ -+static struct hppfs_data *hppfs_get_data(int fd, int filter, -+ struct file *proc_file, -+ struct file *hppfs_file, -+ loff_t *size_out) -+{ -+ struct hppfs_data *data, *new, *head; -+ int n, err; -+ -+ err = -ENOMEM; -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(data == NULL){ -+ printk("hppfs_get_data : head allocation failed\n"); -+ goto failed; -+ } -+ -+ INIT_LIST_HEAD(&data->list); -+ -+ head = data; -+ *size_out = 0; -+ -+ if(filter){ -+ while((n = read_proc(proc_file, data->contents, -+ sizeof(data->contents), NULL, 0)) > 0) -+ os_write_file(fd, data->contents, n); -+ err = os_shutdown_socket(fd, 0, 1); -+ if(err){ -+ printk("hppfs_get_data : failed to shut down " -+ "socket\n"); -+ goto failed_free; -+ } -+ } -+ while(1){ -+ n = os_read_file(fd, data->contents, sizeof(data->contents)); -+ if(n < 0){ -+ err = n; -+ printk("hppfs_get_data : read failed, errno = %d\n", -+ err); -+ goto failed_free; -+ } -+ else if(n == 0) -+ break; -+ -+ *size_out += n; -+ -+ if(n < sizeof(data->contents)) -+ break; -+ -+ new = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(new == 0){ -+ printk("hppfs_get_data : data allocation failed\n"); -+ err = -ENOMEM; -+ goto failed_free; -+ } -+ -+ INIT_LIST_HEAD(&new->list); -+ list_add(&new->list, &data->list); -+ data = new; -+ } -+ return(head); -+ -+ failed_free: -+ free_contents(head); -+ failed: -+ return(ERR_PTR(err)); -+} -+ -+static struct hppfs_private *hppfs_data(void) -+{ -+ struct hppfs_private *data; -+ -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(data == NULL) -+ return(data); -+ -+ *data = ((struct hppfs_private ) { .host_fd = -1, -+ .len = -1, -+ .contents = NULL } ); -+ return(data); -+} -+ -+static int file_mode(int fmode) -+{ -+ if(fmode == (FMODE_READ | FMODE_WRITE)) -+ return(O_RDWR); -+ if(fmode == FMODE_READ) -+ return(O_RDONLY); -+ if(fmode == FMODE_WRITE) -+ return(O_WRONLY); -+ return(0); -+} -+ -+static int hppfs_open(struct inode *inode, struct file *file) -+{ -+ struct hppfs_private *data; -+ struct dentry *proc_dentry; -+ char *host_file; -+ int err, fd, type, filter; -+ -+ err = -ENOMEM; -+ data = hppfs_data(); -+ if(data == NULL) -+ goto out; -+ -+ host_file = dentry_name(file->f_dentry, strlen("/rw")); -+ if(host_file == NULL) -+ goto out_free2; -+ -+ proc_dentry = HPPFS_I(inode)->proc_dentry; -+ -+ /* XXX This isn't closed anywhere */ -+ err = open_private_file(&data->proc_file, proc_dentry, -+ file_mode(file->f_mode)); -+ if(err) -+ goto out_free1; -+ -+ type = os_file_type(host_file); -+ if(type == OS_TYPE_FILE){ -+ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); -+ if(fd >= 0) -+ data->host_fd = fd; -+ else printk("hppfs_open : failed to open '%s', errno = %d\n", -+ host_file, -fd); -+ -+ data->contents = NULL; -+ } -+ else if(type == OS_TYPE_DIR){ -+ fd = open_host_sock(host_file, &filter); -+ if(fd > 0){ -+ data->contents = hppfs_get_data(fd, filter, -+ &data->proc_file, -+ file, &data->len); -+ if(!IS_ERR(data->contents)) -+ data->host_fd = fd; -+ } -+ else printk("hppfs_open : failed to open a socket in " -+ "'%s', errno = %d\n", host_file, -fd); -+ } -+ kfree(host_file); -+ -+ file->private_data = data; -+ return(0); -+ -+ out_free1: -+ kfree(host_file); -+ out_free2: -+ free_contents(data->contents); -+ kfree(data); -+ out: -+ return(err); -+} -+ -+static int hppfs_dir_open(struct inode *inode, struct file *file) -+{ -+ struct hppfs_private *data; -+ struct dentry *proc_dentry; -+ int err; -+ -+ err = -ENOMEM; -+ data = hppfs_data(); -+ if(data == NULL) -+ goto out; -+ -+ proc_dentry = HPPFS_I(inode)->proc_dentry; -+ err = open_private_file(&data->proc_file, proc_dentry, -+ file_mode(file->f_mode)); -+ if(err) -+ goto out_free; -+ -+ file->private_data = data; -+ return(0); -+ -+ out_free: -+ kfree(data); -+ out: -+ return(err); -+} -+ -+static loff_t hppfs_llseek(struct file *file, loff_t off, int where) -+{ -+ struct hppfs_private *data = file->private_data; -+ struct file *proc_file = &data->proc_file; -+ loff_t (*llseek)(struct file *, loff_t, int); -+ loff_t ret; -+ -+ llseek = proc_file->f_dentry->d_inode->i_fop->llseek; -+ if(llseek != NULL){ -+ ret = (*llseek)(proc_file, off, where); -+ if(ret < 0) -+ return(ret); -+ } -+ -+ return(default_llseek(file, off, where)); -+} -+ -+static struct file_operations hppfs_file_fops = { -+ .owner = NULL, -+ .llseek = hppfs_llseek, -+ .read = hppfs_read, -+ .write = hppfs_write, -+ .open = hppfs_open, -+}; -+ -+struct hppfs_dirent { -+ void *vfs_dirent; -+ filldir_t filldir; -+ struct dentry *dentry; -+}; -+ -+static int hppfs_filldir(void *d, const char *name, int size, -+ loff_t offset, ino_t inode, unsigned int type) -+{ -+ struct hppfs_dirent *dirent = d; -+ -+ if(file_removed(dirent->dentry, name)) -+ return(0); -+ -+ return((*dirent->filldir)(dirent->vfs_dirent, name, size, offset, -+ inode, type)); -+} -+ -+static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) -+{ -+ struct hppfs_private *data = file->private_data; -+ struct file *proc_file = &data->proc_file; -+ int (*readdir)(struct file *, void *, filldir_t); -+ struct hppfs_dirent dirent = ((struct hppfs_dirent) -+ { .vfs_dirent = ent, -+ .filldir = filldir, -+ .dentry = file->f_dentry } ); -+ int err; -+ -+ readdir = proc_file->f_dentry->d_inode->i_fop->readdir; -+ -+ proc_file->f_pos = file->f_pos; -+ err = (*readdir)(proc_file, &dirent, hppfs_filldir); -+ file->f_pos = proc_file->f_pos; -+ -+ return(err); -+} -+ -+static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) -+{ -+ return(0); -+} -+ -+static struct file_operations hppfs_dir_fops = { -+ .owner = NULL, -+ .readdir = hppfs_readdir, -+ .open = hppfs_dir_open, -+ .fsync = hppfs_fsync, -+}; -+ -+static int hppfs_statfs(struct super_block *sb, struct kstatfs *sf) -+{ -+ sf->f_blocks = 0; -+ sf->f_bfree = 0; -+ sf->f_bavail = 0; -+ sf->f_files = 0; -+ sf->f_ffree = 0; -+ sf->f_type = HPPFS_SUPER_MAGIC; -+ return(0); -+} -+ -+static struct inode *hppfs_alloc_inode(struct super_block *sb) -+{ -+ struct hppfs_inode_info *hi; -+ -+ hi = kmalloc(sizeof(*hi), GFP_KERNEL); -+ if(hi == NULL) -+ return(NULL); -+ -+ *hi = ((struct hppfs_inode_info) { .proc_dentry = NULL }); -+ inode_init_once(&hi->vfs_inode); -+ return(&hi->vfs_inode); -+} -+ -+void hppfs_delete_inode(struct inode *ino) -+{ -+ clear_inode(ino); -+} -+ -+static void hppfs_destroy_inode(struct inode *inode) -+{ -+ kfree(HPPFS_I(inode)); -+} -+ -+static struct super_operations hppfs_sbops = { -+ .alloc_inode = hppfs_alloc_inode, -+ .destroy_inode = hppfs_destroy_inode, -+ .read_inode = hppfs_read_inode, -+ .delete_inode = hppfs_delete_inode, -+ .statfs = hppfs_statfs, -+}; -+ -+static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen) -+{ -+ struct file proc_file; -+ struct dentry *proc_dentry; -+ int (*readlink)(struct dentry *, char *, int); -+ int err, n; -+ -+ proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; -+ err = open_private_file(&proc_file, proc_dentry, O_RDONLY); -+ if(err) -+ return(err); -+ -+ readlink = proc_dentry->d_inode->i_op->readlink; -+ n = (*readlink)(proc_dentry, buffer, buflen); -+ -+ close_private_file(&proc_file); -+ -+ return(n); -+} -+ -+static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) -+{ -+ struct file proc_file; -+ struct dentry *proc_dentry; -+ int (*follow_link)(struct dentry *, struct nameidata *); -+ int err, n; -+ -+ proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; -+ err = open_private_file(&proc_file, proc_dentry, O_RDONLY); -+ if(err) -+ return(err); -+ -+ follow_link = proc_dentry->d_inode->i_op->follow_link; -+ n = (*follow_link)(proc_dentry, nd); -+ -+ close_private_file(&proc_file); -+ -+ return(n); -+} -+ -+static struct inode_operations hppfs_dir_iops = { -+ .lookup = hppfs_lookup, -+}; -+ -+static struct inode_operations hppfs_link_iops = { -+ .readlink = hppfs_readlink, -+ .follow_link = hppfs_follow_link, -+}; -+ -+static int init_inode(struct inode *inode, struct dentry *dentry) -+{ -+ if(S_ISDIR(dentry->d_inode->i_mode)){ -+ inode->i_op = &hppfs_dir_iops; -+ inode->i_fop = &hppfs_dir_fops; -+ } -+ else if(S_ISLNK(dentry->d_inode->i_mode)){ -+ inode->i_op = &hppfs_link_iops; -+ inode->i_fop = &hppfs_file_fops; -+ } -+ else { -+ inode->i_op = &hppfs_file_iops; -+ inode->i_fop = &hppfs_file_fops; -+ } -+ -+ HPPFS_I(inode)->proc_dentry = dentry; -+ -+ return(0); -+} -+ -+static int hppfs_fill_super(struct super_block *sb, void *d, int silent) -+{ -+ struct inode *root_inode; -+ struct file_system_type *procfs; -+ struct super_block *proc_sb; -+ int err; -+ -+ err = -ENOENT; -+ procfs = get_fs_type("proc"); -+ if(procfs == NULL) -+ goto out; -+ -+ if(list_empty(&procfs->fs_supers)) -+ goto out; -+ -+ proc_sb = list_entry(procfs->fs_supers.next, struct super_block, -+ s_instances); -+ -+ sb->s_blocksize = 1024; -+ sb->s_blocksize_bits = 10; -+ sb->s_magic = HPPFS_SUPER_MAGIC; -+ sb->s_op = &hppfs_sbops; -+ -+ root_inode = iget(sb, 0); -+ if(root_inode == NULL) -+ goto out; -+ -+ err = init_inode(root_inode, proc_sb->s_root); -+ if(err) -+ goto out_put; -+ -+ err = -ENOMEM; -+ sb->s_root = d_alloc_root(root_inode); -+ if(sb->s_root == NULL) -+ goto out_put; -+ -+ hppfs_read_inode(root_inode); -+ -+ return(0); -+ -+ out_put: -+ iput(root_inode); -+ out: -+ return(err); -+} -+ -+static struct super_block *hppfs_read_super(struct file_system_type *type, -+ int flags, const char *dev_name, -+ void *data) -+{ -+ return(get_sb_nodev(type, flags, data, hppfs_fill_super)); -+} -+ -+static struct file_system_type hppfs_type = { -+ .owner = THIS_MODULE, -+ .name = "hppfs", -+ .get_sb = hppfs_read_super, -+ .kill_sb = kill_anon_super, -+ .fs_flags = 0, -+}; -+ -+static int __init init_hppfs(void) -+{ -+ return(register_filesystem(&hppfs_type)); -+} -+ -+static void __exit exit_hppfs(void) -+{ -+ unregister_filesystem(&hppfs_type); -+} -+ -+module_init(init_hppfs) -+module_exit(exit_hppfs) -+MODULE_LICENSE("GPL"); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: uml-2.6.7/arch/um/drivers/ssl.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/ssl.c 2004-07-16 19:36:42.449693128 +0300 -+++ uml-2.6.7/arch/um/drivers/ssl.c 2004-07-16 19:47:23.694209144 +0300 -@@ -10,6 +10,7 @@ - #include "linux/major.h" - #include "linux/mm.h" - #include "linux/init.h" -+#include "linux/console.h" - #include "asm/termbits.h" - #include "asm/irq.h" - #include "line.h" -@@ -53,8 +54,9 @@ - - static struct line_driver driver = { - .name = "UML serial line", -- .devfs_name = "tts/%d", -- .major = TTYAUX_MAJOR, -+ .device_name = "ttS", -+ .devfs_name = "tts/", -+ .major = TTY_MAJOR, - .minor_start = 64, - .type = TTY_DRIVER_TYPE_SERIAL, - .subtype = 0, -@@ -149,6 +151,9 @@ - case TCSETSW: - case TCGETA: - case TIOCMGET: -+ case TCSBRK: -+ case TCSBRKP: -+ case TIOCMSET: - ret = -ENOIOCTLCMD; - break; - default: -@@ -212,6 +217,37 @@ - */ - static int ssl_init_done = 0; - -+static void ssl_console_write(struct console *c, const char *string, -+ unsigned len) -+{ -+ struct line *line = &serial_lines[c->index]; -+ if(ssl_init_done) -+ down(&line->sem); -+ console_write_chan(&line->chan_list, string, len); -+ if(ssl_init_done) -+ up(&line->sem); -+} -+ -+static struct tty_driver *ssl_console_device(struct console *c, int *index) -+{ -+ *index = c->index; -+ return ssl_driver; -+} -+ -+static int ssl_console_setup(struct console *co, char *options) -+{ -+ return(0); -+} -+ -+static struct console ssl_cons = { -+ name: "ttyS", -+ write: ssl_console_write, -+ device: ssl_console_device, -+ setup: ssl_console_setup, -+ flags: CON_PRINTBUFFER, -+ index: -1, -+}; -+ - int ssl_init(void) - { - char *new_title; -@@ -227,17 +263,18 @@ - new_title = add_xterm_umid(opts.xterm_title); - if(new_title != NULL) opts.xterm_title = new_title; - -+ register_console(&ssl_cons); - ssl_init_done = 1; - return(0); - } - --__initcall(ssl_init); -+late_initcall(ssl_init); - - static int ssl_chan_setup(char *str) - { -- line_setup(serial_lines, sizeof(serial_lines)/sizeof(serial_lines[0]), -- str, 1); -- return(1); -+ return(line_setup(serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0]), -+ str, 1)); - } - - __setup("ssl", ssl_chan_setup); -Index: uml-2.6.7/arch/um/kernel/tt/Makefile -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/Makefile 2004-07-16 19:36:45.804183168 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/Makefile 2004-07-16 19:47:23.744201544 +0300 -@@ -1,5 +1,5 @@ - # --# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) - # Licensed under the GPL - # - -@@ -7,7 +7,7 @@ - - obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \ - syscall_kern.o syscall_user.o time.o tlb.o tracer.o trap_user.o \ -- uaccess_user.o sys-$(SUBARCH)/ -+ uaccess.o uaccess_user.o sys-$(SUBARCH)/ - - obj-$(CONFIG_PT_PROXY) += gdb_kern.o ptproxy/ - -@@ -27,5 +27,3 @@ - - $(obj)/unmap_fin.o : $(src)/unmap.o - ld -r -o $@ $< -lc -L/usr/lib -- --clean : -Index: uml-2.6.7/fs/Makefile -=================================================================== ---- uml-2.6.7.orig/fs/Makefile 2004-07-16 19:36:51.434327256 +0300 -+++ uml-2.6.7/fs/Makefile 2004-07-16 19:47:23.786195160 +0300 -@@ -91,3 +91,5 @@ - obj-$(CONFIG_XFS_FS) += xfs/ - obj-$(CONFIG_AFS_FS) += afs/ - obj-$(CONFIG_BEFS_FS) += befs/ -+obj-$(CONFIG_HOSTFS) += hostfs/ -+obj-$(CONFIG_HPPFS) += hppfs/ -Index: uml-2.6.7/arch/um/kernel/tt/tlb.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/tlb.c 2004-07-16 19:35:58.497374896 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/tlb.c 2004-07-16 19:47:23.750200632 +0300 -@@ -10,6 +10,7 @@ - #include "asm/page.h" - #include "asm/pgtable.h" - #include "asm/uaccess.h" -+#include "asm/tlbflush.h" - #include "user_util.h" - #include "mem_user.h" - #include "os.h" -Index: uml-2.6.7/arch/um/kernel/reboot.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/reboot.c 2004-07-16 19:36:45.791185144 +0300 -+++ uml-2.6.7/arch/um/kernel/reboot.c 2004-07-16 19:47:23.724204584 +0300 -@@ -15,6 +15,7 @@ - #ifdef CONFIG_SMP - static void kill_idlers(int me) - { -+#ifdef CONFIG_MODE_TT - struct task_struct *p; - int i; - -@@ -23,6 +24,7 @@ - if((p != NULL) && (p->thread.mode.tt.extern_pid != me)) - os_kill_process(p->thread.mode.tt.extern_pid, 0); - } -+#endif - } - #endif - -Index: uml-2.6.7/arch/um/kernel/ptrace.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/ptrace.c 2004-07-16 19:36:04.815414408 +0300 -+++ uml-2.6.7/arch/um/kernel/ptrace.c 2004-07-16 19:47:23.724204584 +0300 -@@ -24,11 +24,6 @@ - { - } - --extern long do_mmap2(struct task_struct *task, unsigned long addr, -- unsigned long len, unsigned long prot, -- unsigned long flags, unsigned long fd, -- unsigned long pgoff); -- - int sys_ptrace(long request, long pid, long addr, long data) - { - struct task_struct *child; -@@ -302,8 +297,17 @@ - return ret; - } - --void syscall_trace(void) -+void syscall_trace(union uml_pt_regs *regs, int entryexit) - { -+ if (unlikely(current->audit_context)) { -+ if (!entryexit) -+ audit_syscall_entry(current, regs->orig_eax, -+ regs->ebx, regs->ecx, -+ regs->edx, regs->esi); -+ else -+ audit_syscall_exit(current, regs->eax); -+ } -+ - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; - if (!(current->ptrace & PT_PTRACED)) -@@ -311,11 +315,8 @@ - - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ -- current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) -- ? 0x80 : 0); -- current->state = TASK_STOPPED; -- notify_parent(current, SIGCHLD); -- schedule(); -+ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) -+ ? 0x80 : 0)); - - /* - * this isn't the same as continuing with a signal, but it will do -Index: uml-2.6.7/arch/um/include/um_uaccess.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/um_uaccess.h 2004-07-16 19:37:08.610716048 +0300 -+++ uml-2.6.7/arch/um/include/um_uaccess.h 2004-07-16 19:47:23.709206864 +0300 -@@ -38,22 +38,73 @@ - from, n)); - } - -+/* -+ * strncpy_from_user: - Copy a NUL terminated string from userspace. -+ * @dst: Destination address, in kernel space. This buffer must be at -+ * least @count bytes long. -+ * @src: Source address, in user space. -+ * @count: Maximum number of bytes to copy, including the trailing NUL. -+ * -+ * Copies a NUL-terminated string from userspace to kernel space. -+ * -+ * On success, returns the length of the string (not including the trailing -+ * NUL). -+ * -+ * If access to userspace fails, returns -EFAULT (some data may have been -+ * copied). -+ * -+ * If @count is smaller than the length of the string, copies @count bytes -+ * and returns @count. -+ */ -+ - static inline int strncpy_from_user(char *dst, const char *src, int count) - { - return(CHOOSE_MODE_PROC(strncpy_from_user_tt, strncpy_from_user_skas, - dst, src, count)); - } - -+/* -+ * __clear_user: - Zero a block of memory in user space, with less checking. -+ * @to: Destination address, in user space. -+ * @n: Number of bytes to zero. -+ * -+ * Zero a block of memory in user space. Caller must check -+ * the specified block with access_ok() before calling this function. -+ * -+ * Returns number of bytes that could not be cleared. -+ * On success, this will be zero. -+ */ - static inline int __clear_user(void *mem, int len) - { - return(CHOOSE_MODE_PROC(__clear_user_tt, __clear_user_skas, mem, len)); - } - -+/* -+ * clear_user: - Zero a block of memory in user space. -+ * @to: Destination address, in user space. -+ * @n: Number of bytes to zero. -+ * -+ * Zero a block of memory in user space. -+ * -+ * Returns number of bytes that could not be cleared. -+ * On success, this will be zero. -+ */ - static inline int clear_user(void *mem, int len) - { - return(CHOOSE_MODE_PROC(clear_user_tt, clear_user_skas, mem, len)); - } - -+/* -+ * strlen_user: - Get the size of a string in user space. -+ * @str: The string to measure. -+ * @n: The maximum valid length -+ * -+ * Get the size of a NUL-terminated string in user space. -+ * -+ * Returns the size of the string INCLUDING the terminating NUL. -+ * On exception, returns 0. -+ * If the string is too long, returns a value greater than @n. -+ */ - static inline int strnlen_user(const void *str, int len) - { - return(CHOOSE_MODE_PROC(strnlen_user_tt, strnlen_user_skas, str, len)); -Index: uml-2.6.7/include/asm-um/current.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/current.h 2004-07-16 19:35:58.909312272 +0300 -+++ uml-2.6.7/include/asm-um/current.h 2004-07-16 19:47:23.788194856 +0300 -@@ -16,8 +16,10 @@ - #define CURRENT_THREAD(dummy) (((unsigned long) &dummy) & \ - (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER)) - --#define current ({ int dummy; \ -- ((struct thread_info *) CURRENT_THREAD(dummy))->task; }) -+#define current_thread \ -+ ({ int dummy; ((struct thread_info *) CURRENT_THREAD(dummy)); }) -+ -+#define current (current_thread->task) - - #endif /* __ASSEMBLY__ */ - -Index: uml-2.6.7/arch/um/sys-i386/bugs.c -=================================================================== ---- uml-2.6.7.orig/arch/um/sys-i386/bugs.c 2004-07-16 19:37:17.366384984 +0300 -+++ uml-2.6.7/arch/um/sys-i386/bugs.c 2004-07-16 19:47:23.770197592 +0300 -@@ -4,20 +4,21 @@ - */ - - #include --#include - #include - #include - #include -+#include - #include "kern_util.h" - #include "user.h" - #include "sysdep/ptrace.h" - #include "task.h" -+#include "os.h" - - #define MAXTOKEN 64 - - /* Set during early boot */ --int cpu_has_cmov = 1; --int cpu_has_xmm = 0; -+int host_has_cmov = 1; -+int host_has_xmm = 0; - - static char token(int fd, char *buf, int len, char stop) - { -@@ -27,13 +28,15 @@ - ptr = buf; - end = &buf[len]; - do { -- n = read(fd, ptr, sizeof(*ptr)); -+ n = os_read_file(fd, ptr, sizeof(*ptr)); - c = *ptr++; -- if(n == 0) return(0); -- else if(n != sizeof(*ptr)){ -- printk("Reading /proc/cpuinfo failed, " -- "errno = %d\n", errno); -- return(-errno); -+ if(n != sizeof(*ptr)){ -+ if(n == 0) return(0); -+ printk("Reading /proc/cpuinfo failed, err = %d\n", -n); -+ if(n < 0) -+ return(n); -+ else -+ return(-EIO); - } - } while((c != '\n') && (c != stop) && (ptr < end)); - -@@ -45,45 +48,79 @@ - return(c); - } - --static int check_cpu_feature(char *feature, int *have_it) -+static int find_cpuinfo_line(int fd, char *key, char *scratch, int len) - { -- char buf[MAXTOKEN], c; -- int fd, len = sizeof(buf)/sizeof(buf[0]), n; -- -- printk("Checking for host processor %s support...", feature); -- fd = open("/proc/cpuinfo", O_RDONLY); -- if(fd < 0){ -- printk("Couldn't open /proc/cpuinfo, errno = %d\n", errno); -- return(0); -- } -+ int n; -+ char c; - -- *have_it = 0; -- buf[len - 1] = '\0'; -+ scratch[len - 1] = '\0'; - while(1){ -- c = token(fd, buf, len - 1, ':'); -- if(c <= 0) goto out; -+ c = token(fd, scratch, len - 1, ':'); -+ if(c <= 0) -+ return(0); - else if(c != ':'){ - printk("Failed to find ':' in /proc/cpuinfo\n"); -- goto out; -+ return(0); - } - -- if(!strncmp(buf, "flags", strlen("flags"))) break; -+ if(!strncmp(scratch, key, strlen(key))) -+ return(1); - - do { -- n = read(fd, &c, sizeof(c)); -+ n = os_read_file(fd, &c, sizeof(c)); - if(n != sizeof(c)){ - printk("Failed to find newline in " -- "/proc/cpuinfo, n = %d, errno = %d\n", -- n, errno); -- goto out; -+ "/proc/cpuinfo, err = %d\n", -n); -+ return(0); - } - } while(c != '\n'); - } -+ return(0); -+} -+ -+int cpu_feature(char *what, char *buf, int len) -+{ -+ int fd, ret = 0; -+ -+ fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); -+ return(0); -+ } -+ -+ if(!find_cpuinfo_line(fd, what, buf, len)){ -+ printk("Couldn't find '%s' line in /proc/cpuinfo\n", what); -+ goto out_close; -+ } -+ -+ token(fd, buf, len, '\n'); -+ ret = 1; -+ -+ out_close: -+ os_close_file(fd); -+ return(ret); -+} -+ -+static int check_cpu_flag(char *feature, int *have_it) -+{ -+ char buf[MAXTOKEN], c; -+ int fd, len = sizeof(buf)/sizeof(buf[0]); -+ -+ printk("Checking for host processor %s support...", feature); -+ fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); -+ return(0); -+ } -+ -+ *have_it = 0; -+ if(!find_cpuinfo_line(fd, "flags", buf, sizeof(buf) / sizeof(buf[0]))) -+ goto out; - - c = token(fd, buf, len - 1, ' '); - if(c < 0) goto out; - else if(c != ' '){ -- printk("Failed to find ':' in /proc/cpuinfo\n"); -+ printk("Failed to find ' ' in /proc/cpuinfo\n"); - goto out; - } - -@@ -100,21 +137,48 @@ - out: - if(*have_it == 0) printk("No\n"); - else if(*have_it == 1) printk("Yes\n"); -- close(fd); -+ os_close_file(fd); - return(1); - } - -+#if 0 /* This doesn't work in tt mode, plus it's causing compilation problems -+ * for some people. -+ */ -+static void disable_lcall(void) -+{ -+ struct modify_ldt_ldt_s ldt; -+ int err; -+ -+ bzero(&ldt, sizeof(ldt)); -+ ldt.entry_number = 7; -+ ldt.base_addr = 0; -+ ldt.limit = 0; -+ err = modify_ldt(1, &ldt, sizeof(ldt)); -+ if(err) -+ printk("Failed to disable lcall7 - errno = %d\n", errno); -+} -+#endif -+ -+void arch_init_thread(void) -+{ -+#if 0 -+ disable_lcall(); -+#endif -+} -+ - void arch_check_bugs(void) - { - int have_it; - -- if(access("/proc/cpuinfo", R_OK)){ -+ if(os_access("/proc/cpuinfo", OS_ACC_R_OK) < 0){ - printk("/proc/cpuinfo not available - skipping CPU capability " - "checks\n"); - return; - } -- if(check_cpu_feature("cmov", &have_it)) cpu_has_cmov = have_it; -- if(check_cpu_feature("xmm", &have_it)) cpu_has_xmm = have_it; -+ if(check_cpu_flag("cmov", &have_it)) -+ host_has_cmov = have_it; -+ if(check_cpu_flag("xmm", &have_it)) -+ host_has_xmm = have_it; - } - - int arch_handle_signal(int sig, union uml_pt_regs *regs) -@@ -130,18 +194,18 @@ - if((*((char *) ip) != 0x0f) || ((*((char *) (ip + 1)) & 0xf0) != 0x40)) - return(0); - -- if(cpu_has_cmov == 0) -+ if(host_has_cmov == 0) - panic("SIGILL caused by cmov, which this processor doesn't " - "implement, boot a filesystem compiled for older " - "processors"); -- else if(cpu_has_cmov == 1) -+ else if(host_has_cmov == 1) - panic("SIGILL caused by cmov, which this processor claims to " - "implement"); -- else if(cpu_has_cmov == -1) -+ else if(host_has_cmov == -1) - panic("SIGILL caused by cmov, couldn't tell if this processor " - "implements it, boot a filesystem compiled for older " - "processors"); -- else panic("Bad value for cpu_has_cmov (%d)", cpu_has_cmov); -+ else panic("Bad value for host_has_cmov (%d)", host_has_cmov); - return(0); - } - -Index: uml-2.6.7/arch/um/kernel/skas/include/mode.h -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/include/mode.h 2004-07-16 19:36:48.253810768 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/include/mode.h 2004-07-16 19:47:23.727204128 +0300 -@@ -12,14 +12,16 @@ - extern int have_fpx_regs; - - extern void user_time_init_skas(void); --extern int copy_sc_from_user_skas(union uml_pt_regs *regs, void *from_ptr); --extern int copy_sc_to_user_skas(void *to_ptr, void *fp, -+extern int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, -+ void *from_ptr); -+extern int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp, - union uml_pt_regs *regs, - unsigned long fault_addr, int fault_type); - extern void sig_handler_common_skas(int sig, void *sc_ptr); - extern void halt_skas(void); - extern void reboot_skas(void); - extern void kill_off_processes_skas(void); -+extern int is_skas_winch(int pid, int fd, void *data); - - #endif - -Index: uml-2.6.7/arch/um/include/sysdep-i386/sigcontext.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/sysdep-i386/sigcontext.h 2004-07-16 19:37:08.763692792 +0300 -+++ uml-2.6.7/arch/um/include/sysdep-i386/sigcontext.h 2004-07-16 19:47:23.707207168 +0300 -@@ -28,8 +28,8 @@ - */ - #define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0) - --/* These are General Protection and Page Fault */ --#define SEGV_IS_FIXABLE(trap) ((trap == 13) || (trap == 14)) -+/* This is Page Fault */ -+#define SEGV_IS_FIXABLE(trap) (trap == 14) - - #define SC_SEGV_IS_FIXABLE(sc) (SEGV_IS_FIXABLE(SC_TRAPNO(sc))) - -Index: uml-2.6.7/arch/um/drivers/net_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/net_user.c 2004-07-16 19:36:10.505549376 +0300 -+++ uml-2.6.7/arch/um/drivers/net_user.c 2004-07-16 19:47:24.796041640 +0300 -@@ -26,8 +26,7 @@ - if(gate_addr == NULL) return(0); - if(sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], - &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4){ -- printk("Invalid tap IP address - '%s'\n", -- gate_addr); -+ printk("Invalid tap IP address - '%s'\n", gate_addr); - return(-EINVAL); - } - return(0); -@@ -60,18 +59,18 @@ - } - - *output = '\0'; -- if(read(fd, &remain, sizeof(remain)) != sizeof(remain)){ -- printk("read_output - read of length failed, errno = %d\n", -- errno); -+ n = os_read_file(fd, &remain, sizeof(remain)); -+ if(n != sizeof(remain)){ -+ printk("read_output - read of length failed, err = %d\n", -n); - return; - } - - while(remain != 0){ - n = (remain < len) ? remain : len; -- actual = read(fd, output, n); -+ actual = os_read_file(fd, output, n); - if(actual != n){ - printk("read_output - read of data failed, " -- "errno = %d\n", errno); -+ "err = %d\n", -actual); - return; - } - remain -= actual; -@@ -83,13 +82,12 @@ - { - int n; - -- while(((n = read(fd, buf, len)) < 0) && (errno == EINTR)) ; -+ n = os_read_file(fd, buf, len); - -- if(n < 0){ -- if(errno == EAGAIN) return(0); -- return(-errno); -- } -- else if(n == 0) return(-ENOTCONN); -+ if(n == -EAGAIN) -+ return(0); -+ else if(n == 0) -+ return(-ENOTCONN); - return(n); - } - -@@ -112,13 +110,13 @@ - { - int n; - -- while(((n = write(fd, buf, len)) < 0) && (errno == EINTR)) ; -- if(n < 0){ -- if(errno == EAGAIN) return(0); -- return(-errno); -- } -- else if(n == 0) return(-ENOTCONN); -- return(n); -+ n = os_write_file(fd, buf, len); -+ -+ if(n == -EAGAIN) -+ return(0); -+ else if(n == 0) -+ return(-ENOTCONN); -+ return(n); - } - - int net_send(int fd, void *buf, int len) -@@ -157,7 +155,7 @@ - { - struct change_pre_exec_data *data = arg; - -- close(data->close_me); -+ os_close_file(data->close_me); - dup2(data->stdout, 1); - } - -@@ -167,17 +165,18 @@ - struct change_pre_exec_data pe_data; - - err = os_pipe(fds, 1, 0); -- if(err){ -- printk("change_tramp - pipe failed, errno = %d\n", -err); -+ if(err < 0){ -+ printk("change_tramp - pipe failed, err = %d\n", -err); - return(err); - } - pe_data.close_me = fds[0]; - pe_data.stdout = fds[1]; - pid = run_helper(change_pre_exec, &pe_data, argv, NULL); - -- close(fds[1]); -+ os_close_file(fds[1]); - read_output(fds[0], output, output_len); -- waitpid(pid, NULL, 0); -+ -+ CATCH_EINTR(err = waitpid(pid, NULL, 0)); - return(pid); - } - -Index: uml-2.6.7/include/asm-um/processor-generic.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/processor-generic.h 2004-07-16 19:36:07.621987744 +0300 -+++ uml-2.6.7/include/asm-um/processor-generic.h 2004-07-16 19:47:23.794193944 +0300 -@@ -11,33 +11,14 @@ - struct task_struct; - - #include "linux/config.h" --#include "linux/signal.h" - #include "asm/ptrace.h" --#include "asm/siginfo.h" - #include "choose-mode.h" - - struct mm_struct; - - #define current_text_addr() ((void *) 0) - --#define cpu_relax() do ; while (0) -- --#ifdef CONFIG_MODE_TT --struct proc_tt_mode { -- int extern_pid; -- int tracing; -- int switch_pipe[2]; -- int singlestep_syscall; -- int vm_seq; --}; --#endif -- --#ifdef CONFIG_MODE_SKAS --struct proc_skas_mode { -- void *switch_buf; -- void *fork_buf; --}; --#endif -+#define cpu_relax() barrier() - - struct thread_struct { - int forking; -@@ -46,6 +27,7 @@ - struct pt_regs regs; - unsigned long cr2; - int err; -+ unsigned long trap_no; - void *fault_addr; - void *fault_catcher; - struct task_struct *prev_sched; -@@ -54,10 +36,20 @@ - struct arch_thread arch; - union { - #ifdef CONFIG_MODE_TT -- struct proc_tt_mode tt; -+ struct { -+ int extern_pid; -+ int tracing; -+ int switch_pipe[2]; -+ int singlestep_syscall; -+ int vm_seq; -+ } tt; - #endif - #ifdef CONFIG_MODE_SKAS -- struct proc_skas_mode skas; -+ struct { -+ void *switch_buf; -+ void *fork_buf; -+ int mm_count; -+ } skas; - #endif - } mode; - struct { -@@ -99,14 +91,19 @@ - } mm_segment_t; - - extern struct task_struct *alloc_task_struct(void); --extern void free_task_struct(struct task_struct *task); - - extern void release_thread(struct task_struct *); - extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); - extern void dump_thread(struct pt_regs *regs, struct user *u); -+extern void prepare_to_copy(struct task_struct *tsk); - - extern unsigned long thread_saved_pc(struct task_struct *t); - -+static inline void mm_copy_segments(struct mm_struct *from_mm, -+ struct mm_struct *new_mm) -+{ -+} -+ - #define init_stack (init_thread_union.stack) - - /* -Index: uml-2.6.7/include/asm-um/fixmap.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/fixmap.h 2004-07-16 19:37:31.708204696 +0300 -+++ uml-2.6.7/include/asm-um/fixmap.h 2004-07-16 19:47:23.790194552 +0300 -@@ -34,6 +34,7 @@ - FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, - #endif -+ FIX_VSYSCALL, - __end_of_fixed_addresses - }; - -@@ -63,6 +64,13 @@ - #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) - #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -+/* -+ * This is the range that is readable by user mode, and things -+ * acting like user mode such as get_user_pages. -+ */ -+#define FIXADDR_USER_START (__fix_to_virt(FIX_VSYSCALL)) -+#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) -+ - extern void __this_fixmap_does_not_exist(void); - - /* -Index: uml-2.6.7/arch/um/os-Linux/drivers/ethertap_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/os-Linux/drivers/ethertap_kern.c 2004-07-16 19:35:55.747792896 +0300 -+++ uml-2.6.7/arch/um/os-Linux/drivers/ethertap_kern.c 2004-07-16 19:47:23.765198352 +0300 -@@ -8,7 +8,6 @@ - #include "linux/init.h" - #include "linux/netdevice.h" - #include "linux/etherdevice.h" --#include "linux/init.h" - #include "net_kern.h" - #include "net_user.h" - #include "etap.h" -Index: uml-2.6.7/arch/um/kernel/tt/syscall_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/syscall_user.c 2004-07-16 19:37:02.608628504 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/syscall_user.c 2004-07-16 19:47:23.749200784 +0300 -@@ -33,7 +33,7 @@ - SC_START_SYSCALL(sc); - - index = record_syscall_start(syscall); -- syscall_trace(); -+ syscall_trace(regs, 1); - result = execute_syscall(regs); - - /* regs->sc may have changed while the system call ran (there may -@@ -46,7 +46,7 @@ - (result == -ERESTARTNOINTR)) - do_signal(result); - -- syscall_trace(); -+ syscall_trace(regs, 0); - record_syscall_end(index, result); - } - -Index: uml-2.6.7/arch/um/drivers/ubd_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/ubd_kern.c 2004-07-16 19:36:39.996066136 +0300 -+++ uml-2.6.7/arch/um/drivers/ubd_kern.c 2004-07-16 19:47:23.697208688 +0300 -@@ -8,6 +8,13 @@ - * old style ubd by setting UBD_SHIFT to 0 - * 2002-09-27...2002-10-18 massive tinkering for 2.5 - * partitions have changed in 2.5 -+ * 2003-01-29 more tinkering for 2.5.59-1 -+ * This should now address the sysfs problems and has -+ * the symlink for devfs to allow for booting with -+ * the common /dev/ubd/discX/... names rather than -+ * only /dev/ubdN/discN this version also has lots of -+ * clean ups preparing for ubd-many. -+ * James McMechan - */ - - #define MAJOR_NR UBD_MAJOR -@@ -40,9 +47,12 @@ - #include "mconsole_kern.h" - #include "init.h" - #include "irq_user.h" -+#include "irq_kern.h" - #include "ubd_user.h" - #include "2_5compat.h" - #include "os.h" -+#include "mem.h" -+#include "mem_kern.h" - - static spinlock_t ubd_io_lock = SPIN_LOCK_UNLOCKED; - static spinlock_t ubd_lock = SPIN_LOCK_UNLOCKED; -@@ -56,6 +66,10 @@ - - #define MAX_DEV (8) - -+/* Changed in early boot */ -+static int ubd_do_mmap = 0; -+#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE -+ - static struct block_device_operations ubd_blops = { - .owner = THIS_MODULE, - .open = ubd_open, -@@ -67,7 +81,7 @@ - static request_queue_t *ubd_queue; - - /* Protected by ubd_lock */ --static int fake_major = 0; -+static int fake_major = MAJOR_NR; - - static struct gendisk *ubd_gendisk[MAX_DEV]; - static struct gendisk *fake_gendisk[MAX_DEV]; -@@ -96,13 +110,19 @@ - - struct ubd { - char *file; -- int is_dir; - int count; - int fd; - __u64 size; - struct openflags boot_openflags; - struct openflags openflags; -+ int no_cow; - struct cow cow; -+ -+ int map_writes; -+ int map_reads; -+ int nomap_writes; -+ int nomap_reads; -+ int write_maps; - }; - - #define DEFAULT_COW { \ -@@ -115,21 +135,28 @@ - - #define DEFAULT_UBD { \ - .file = NULL, \ -- .is_dir = 0, \ - .count = 0, \ - .fd = -1, \ - .size = -1, \ - .boot_openflags = OPEN_FLAGS, \ - .openflags = OPEN_FLAGS, \ -+ .no_cow = 0, \ - .cow = DEFAULT_COW, \ -+ .map_writes = 0, \ -+ .map_reads = 0, \ -+ .nomap_writes = 0, \ -+ .nomap_reads = 0, \ -+ .write_maps = 0, \ - } - - struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; - - static int ubd0_init(void) - { -- if(ubd_dev[0].file == NULL) -- ubd_dev[0].file = "root_fs"; -+ struct ubd *dev = &ubd_dev[0]; -+ -+ if(dev->file == NULL) -+ dev->file = "root_fs"; - return(0); - } - -@@ -196,19 +223,46 @@ - " Create ide0 entries that map onto ubd devices.\n\n" - ); - -+static int parse_unit(char **ptr) -+{ -+ char *str = *ptr, *end; -+ int n = -1; -+ -+ if(isdigit(*str)) { -+ n = simple_strtoul(str, &end, 0); -+ if(end == str) -+ return(-1); -+ *ptr = end; -+ } -+ else if (('a' <= *str) && (*str <= 'h')) { -+ n = *str - 'a'; -+ str++; -+ *ptr = str; -+ } -+ return(n); -+} -+ - static int ubd_setup_common(char *str, int *index_out) - { -+ struct ubd *dev; - struct openflags flags = global_openflags; - char *backing_file; - int n, err; - - if(index_out) *index_out = -1; -- n = *str++; -+ n = *str; - if(n == '='){ -- static int fake_major_allowed = 1; - char *end; - int major; - -+ str++; -+ if(!strcmp(str, "mmap")){ -+ CHOOSE_MODE(printk("mmap not supported by the ubd " -+ "driver in tt mode\n"), -+ ubd_do_mmap = 1); -+ return(0); -+ } -+ - if(!strcmp(str, "sync")){ - global_openflags.s = 1; - return(0); -@@ -220,20 +274,14 @@ - return(1); - } - -- if(!fake_major_allowed){ -- printk(KERN_ERR "Can't assign a fake major twice\n"); -- return(1); -- } -- - err = 1; - spin_lock(&ubd_lock); -- if(!fake_major_allowed){ -+ if(fake_major != MAJOR_NR){ - printk(KERN_ERR "Can't assign a fake major twice\n"); - goto out1; - } - - fake_major = major; -- fake_major_allowed = 0; - - printk(KERN_INFO "Setting extra ubd major number to %d\n", - major); -@@ -243,25 +291,23 @@ - return(err); - } - -- if(n < '0'){ -- printk(KERN_ERR "ubd_setup : index out of range\n"); } -- -- if((n >= '0') && (n <= '9')) n -= '0'; -- else if((n >= 'a') && (n <= 'z')) n -= 'a'; -- else { -- printk(KERN_ERR "ubd_setup : device syntax invalid\n"); -+ n = parse_unit(&str); -+ if(n < 0){ -+ printk(KERN_ERR "ubd_setup : couldn't parse unit number " -+ "'%s'\n", str); - return(1); - } - if(n >= MAX_DEV){ -- printk(KERN_ERR "ubd_setup : index out of range " -- "(%d devices)\n", MAX_DEV); -+ printk(KERN_ERR "ubd_setup : index %d out of range " -+ "(%d devices)\n", n, MAX_DEV); - return(1); - } - - err = 1; - spin_lock(&ubd_lock); - -- if(ubd_dev[n].file != NULL){ -+ dev = &ubd_dev[n]; -+ if(dev->file != NULL){ - printk(KERN_ERR "ubd_setup : device already configured\n"); - goto out2; - } -@@ -276,6 +322,11 @@ - flags.s = 1; - str++; - } -+ if (*str == 'd'){ -+ dev->no_cow = 1; -+ str++; -+ } -+ - if(*str++ != '='){ - printk(KERN_ERR "ubd_setup : Expected '='\n"); - goto out2; -@@ -284,14 +335,17 @@ - err = 0; - backing_file = strchr(str, ','); - if(backing_file){ -- *backing_file = '\0'; -- backing_file++; -+ if(dev->no_cow) -+ printk(KERN_ERR "Can't specify both 'd' and a " -+ "cow file\n"); -+ else { -+ *backing_file = '\0'; -+ backing_file++; -+ } - } -- ubd_dev[n].file = str; -- if(ubd_is_dir(ubd_dev[n].file)) -- ubd_dev[n].is_dir = 1; -- ubd_dev[n].cow.file = backing_file; -- ubd_dev[n].boot_openflags = flags; -+ dev->file = str; -+ dev->cow.file = backing_file; -+ dev->boot_openflags = flags; - out2: - spin_unlock(&ubd_lock); - return(err); -@@ -321,8 +375,7 @@ - static int fakehd_set = 0; - static int fakehd(char *str) - { -- printk(KERN_INFO -- "fakehd : Changing ubd name to \"hd\".\n"); -+ printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n"); - fakehd_set = 1; - return 1; - } -@@ -368,32 +421,42 @@ - { - struct io_thread_req req; - struct request *rq = elv_next_request(ubd_queue); -- int n; -+ int n, err; - - do_ubd = NULL; - intr_count++; - n = read_ubd_fs(thread_fd, &req, sizeof(req)); - if(n != sizeof(req)){ - printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " -- "errno = %d\n", os_getpid(), -n); -+ "err = %d\n", os_getpid(), -n); - spin_lock(&ubd_io_lock); - end_request(rq, 0); - spin_unlock(&ubd_io_lock); - return; - } - -- if((req.offset != ((__u64) (rq->sector)) << 9) || -- (req.length != (rq->current_nr_sectors) << 9)) -+ if((req.op != UBD_MMAP) && -+ ((req.offset != ((__u64) (rq->sector)) << 9) || -+ (req.length != (rq->current_nr_sectors) << 9))) - panic("I/O op mismatch"); - -+ if(req.map_fd != -1){ -+ err = physmem_subst_mapping(req.buffer, req.map_fd, -+ req.map_offset, 1); -+ if(err) -+ printk("ubd_handler - physmem_subst_mapping failed, " -+ "err = %d\n", -err); -+ } -+ - ubd_finish(rq, req.error); - reactivate_fd(thread_fd, UBD_IRQ); - do_ubd_request(ubd_queue); - } - --static void ubd_intr(int irq, void *dev, struct pt_regs *unused) -+static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) - { - ubd_handler(); -+ return(IRQ_HANDLED); - } - - /* Only changed by ubd_init, which is an initcall. */ -@@ -417,10 +480,14 @@ - - static void ubd_close(struct ubd *dev) - { -+ if(ubd_do_mmap) -+ physmem_forget_descriptor(dev->fd); - os_close_file(dev->fd); - if(dev->cow.file == NULL) - return; - -+ if(ubd_do_mmap) -+ physmem_forget_descriptor(dev->cow.fd); - os_close_file(dev->cow.fd); - vfree(dev->cow.bitmap); - dev->cow.bitmap = NULL; -@@ -429,18 +496,20 @@ - static int ubd_open_dev(struct ubd *dev) - { - struct openflags flags; -- int err, n, create_cow, *create_ptr; -+ char **back_ptr; -+ int err, create_cow, *create_ptr; - -+ dev->openflags = dev->boot_openflags; - create_cow = 0; - create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL; -- dev->fd = open_ubd_file(dev->file, &dev->openflags, &dev->cow.file, -+ back_ptr = dev->no_cow ? NULL : &dev->cow.file; -+ dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr, - &dev->cow.bitmap_offset, &dev->cow.bitmap_len, - &dev->cow.data_offset, create_ptr); - - if((dev->fd == -ENOENT) && create_cow){ -- n = dev - ubd_dev; - dev->fd = create_cow_file(dev->file, dev->cow.file, -- dev->openflags, 1 << 9, -+ dev->openflags, 1 << 9, PAGE_SIZE, - &dev->cow.bitmap_offset, - &dev->cow.bitmap_len, - &dev->cow.data_offset); -@@ -455,13 +524,17 @@ - if(dev->cow.file != NULL){ - err = -ENOMEM; - dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len); -- if(dev->cow.bitmap == NULL) goto error; -+ if(dev->cow.bitmap == NULL){ -+ printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); -+ goto error; -+ } - flush_tlb_kernel_vm(); - - err = read_cow_bitmap(dev->fd, dev->cow.bitmap, - dev->cow.bitmap_offset, - dev->cow.bitmap_len); -- if(err) goto error; -+ if(err < 0) -+ goto error; - - flags = dev->openflags; - flags.w = 0; -@@ -481,17 +554,31 @@ - - { - struct gendisk *disk; -+ char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")]; -+ int err; - - disk = alloc_disk(1 << UBD_SHIFT); -- if (!disk) -- return -ENOMEM; -+ if(disk == NULL) -+ return(-ENOMEM); - - disk->major = major; - disk->first_minor = unit << UBD_SHIFT; - disk->fops = &ubd_blops; - set_capacity(disk, size / 512); -- sprintf(disk->disk_name, "ubd"); -- sprintf(disk->devfs_name, "ubd/disc%d", unit); -+ if(major == MAJOR_NR){ -+ sprintf(disk->disk_name, "ubd%c", 'a' + unit); -+ sprintf(disk->devfs_name, "ubd/disc%d", unit); -+ sprintf(from, "ubd/%d", unit); -+ sprintf(to, "disc%d/disc", unit); -+ err = devfs_mk_symlink(from, to); -+ if(err) -+ printk("ubd_new_disk failed to make link from %s to " -+ "%s, error = %d\n", from, to, err); -+ } -+ else { -+ sprintf(disk->disk_name, "ubd_fake%d", unit); -+ sprintf(disk->devfs_name, "ubd_fake/disc%d", unit); -+ } - - disk->private_data = &ubd_dev[unit]; - disk->queue = ubd_queue; -@@ -506,24 +593,21 @@ - struct ubd *dev = &ubd_dev[n]; - int err; - -- if(dev->is_dir) -- return(-EISDIR); -- -- if (!dev->file) -+ if(dev->file == NULL) - return(-ENODEV); - - if (ubd_open_dev(dev)) - return(-ENODEV); - - err = ubd_file_size(dev, &dev->size); -- if(err) -+ if(err < 0) - return(err); - - err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]); - if(err) - return(err); - -- if(fake_major) -+ if(fake_major != MAJOR_NR) - ubd_new_disk(fake_major, dev->size, n, - &fake_gendisk[n]); - -@@ -561,42 +645,42 @@ - return(err); - } - --static int ubd_get_config(char *dev, char *str, int size, char **error_out) -+static int ubd_get_config(char *name, char *str, int size, char **error_out) - { -- struct ubd *ubd; -+ struct ubd *dev; - char *end; -- int major, n = 0; -+ int n, len = 0; - -- major = simple_strtoul(dev, &end, 0); -- if((*end != '\0') || (end == dev)){ -- *error_out = "ubd_get_config : didn't parse major number"; -+ n = simple_strtoul(name, &end, 0); -+ if((*end != '\0') || (end == name)){ -+ *error_out = "ubd_get_config : didn't parse device number"; - return(-1); - } - -- if((major >= MAX_DEV) || (major < 0)){ -- *error_out = "ubd_get_config : major number out of range"; -+ if((n >= MAX_DEV) || (n < 0)){ -+ *error_out = "ubd_get_config : device number out of range"; - return(-1); - } - -- ubd = &ubd_dev[major]; -+ dev = &ubd_dev[n]; - spin_lock(&ubd_lock); - -- if(ubd->file == NULL){ -- CONFIG_CHUNK(str, size, n, "", 1); -+ if(dev->file == NULL){ -+ CONFIG_CHUNK(str, size, len, "", 1); - goto out; - } - -- CONFIG_CHUNK(str, size, n, ubd->file, 0); -+ CONFIG_CHUNK(str, size, len, dev->file, 0); - -- if(ubd->cow.file != NULL){ -- CONFIG_CHUNK(str, size, n, ",", 0); -- CONFIG_CHUNK(str, size, n, ubd->cow.file, 1); -+ if(dev->cow.file != NULL){ -+ CONFIG_CHUNK(str, size, len, ",", 0); -+ CONFIG_CHUNK(str, size, len, dev->cow.file, 1); - } -- else CONFIG_CHUNK(str, size, n, "", 1); -+ else CONFIG_CHUNK(str, size, len, "", 1); - - out: - spin_unlock(&ubd_lock); -- return(n); -+ return(len); - } - - static int ubd_remove(char *str) -@@ -604,11 +688,9 @@ - struct ubd *dev; - int n, err = -ENODEV; - -- if(!isdigit(*str)) -- return(err); /* it should be a number 0-7/a-h */ -+ n = parse_unit(&str); - -- n = *str - '0'; -- if(n >= MAX_DEV) -+ if((n < 0) || (n >= MAX_DEV)) - return(err); - - dev = &ubd_dev[n]; -@@ -669,7 +751,7 @@ - - elevator_init(ubd_queue, &elevator_noop); - -- if (fake_major != 0) { -+ if (fake_major != MAJOR_NR) { - char name[sizeof("ubd_nnn\0")]; - - snprintf(name, sizeof(name), "ubd_%d", fake_major); -@@ -696,6 +778,7 @@ - io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), - &thread_fd); - if(io_pid < 0){ -+ io_pid = -1; - printk(KERN_ERR - "ubd : Failed to start I/O thread (errno = %d) - " - "falling back to synchronous I/O\n", -io_pid); -@@ -703,8 +786,8 @@ - } - err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, - SA_INTERRUPT, "ubd", ubd_dev); -- if(err != 0) printk(KERN_ERR -- "um_request_irq failed - errno = %d\n", -err); -+ if(err != 0) -+ printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); - return(err); - } - -@@ -714,15 +797,9 @@ - { - struct gendisk *disk = inode->i_bdev->bd_disk; - struct ubd *dev = disk->private_data; -- int err = -EISDIR; -- -- if(dev->is_dir == 1) -- goto out; -+ int err = 0; - -- err = 0; - if(dev->count == 0){ -- dev->openflags = dev->boot_openflags; -- - err = ubd_open_dev(dev); - if(err){ - printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", -@@ -749,62 +826,156 @@ - return(0); - } - --void cowify_req(struct io_thread_req *req, struct ubd *dev) -+static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, -+ __u64 *cow_offset, unsigned long *bitmap, -+ __u64 bitmap_offset, unsigned long *bitmap_words, -+ __u64 bitmap_len) -+{ -+ __u64 sector = io_offset >> 9; -+ int i, update_bitmap = 0; -+ -+ for(i = 0; i < length >> 9; i++){ -+ if(cow_mask != NULL) -+ ubd_set_bit(i, (unsigned char *) cow_mask); -+ if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) -+ continue; -+ -+ update_bitmap = 1; -+ ubd_set_bit(sector + i, (unsigned char *) bitmap); -+ } -+ -+ if(!update_bitmap) -+ return; -+ -+ *cow_offset = sector / (sizeof(unsigned long) * 8); -+ -+ /* This takes care of the case where we're exactly at the end of the -+ * device, and *cow_offset + 1 is off the end. So, just back it up -+ * by one word. Thanks to Lynn Kerby for the fix and James McMechan -+ * for the original diagnosis. -+ */ -+ if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / -+ sizeof(unsigned long) - 1)) -+ (*cow_offset)--; -+ -+ bitmap_words[0] = bitmap[*cow_offset]; -+ bitmap_words[1] = bitmap[*cow_offset + 1]; -+ -+ *cow_offset *= sizeof(unsigned long); -+ *cow_offset += bitmap_offset; -+} -+ -+static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, -+ __u64 bitmap_offset, __u64 bitmap_len) - { -- int i, update_bitmap, sector = req->offset >> 9; -+ __u64 sector = req->offset >> 9; -+ int i; - - if(req->length > (sizeof(req->sector_mask) * 8) << 9) - panic("Operation too long"); -+ - if(req->op == UBD_READ) { - for(i = 0; i < req->length >> 9; i++){ -- if(ubd_test_bit(sector + i, (unsigned char *) -- dev->cow.bitmap)){ -+ if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) - ubd_set_bit(i, (unsigned char *) - &req->sector_mask); -- } - } -- } -- else { -- update_bitmap = 0; -- for(i = 0; i < req->length >> 9; i++){ -- ubd_set_bit(i, (unsigned char *) -- &req->sector_mask); -- if(!ubd_test_bit(sector + i, (unsigned char *) -- dev->cow.bitmap)) -- update_bitmap = 1; -- ubd_set_bit(sector + i, (unsigned char *) -- dev->cow.bitmap); -- } -- if(update_bitmap){ -- req->cow_offset = sector / (sizeof(unsigned long) * 8); -- req->bitmap_words[0] = -- dev->cow.bitmap[req->cow_offset]; -- req->bitmap_words[1] = -- dev->cow.bitmap[req->cow_offset + 1]; -- req->cow_offset *= sizeof(unsigned long); -- req->cow_offset += dev->cow.bitmap_offset; -+ } -+ else cowify_bitmap(req->offset, req->length, &req->sector_mask, -+ &req->cow_offset, bitmap, bitmap_offset, -+ req->bitmap_words, bitmap_len); -+} -+ -+static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset) -+{ -+ __u64 sector; -+ unsigned char *bitmap; -+ int bit, i; -+ -+ /* mmap must have been requested on the command line */ -+ if(!ubd_do_mmap) -+ return(-1); -+ -+ /* The buffer must be page aligned */ -+ if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0) -+ return(-1); -+ -+ /* The request must be a page long */ -+ if((req->current_nr_sectors << 9) != PAGE_SIZE) -+ return(-1); -+ -+ if(dev->cow.file == NULL) -+ return(dev->fd); -+ -+ sector = offset >> 9; -+ bitmap = (unsigned char *) dev->cow.bitmap; -+ bit = ubd_test_bit(sector, bitmap); -+ -+ for(i = 1; i < req->current_nr_sectors; i++){ -+ if(ubd_test_bit(sector + i, bitmap) != bit) -+ return(-1); -+ } -+ -+ if(bit || (rq_data_dir(req) == WRITE)) -+ offset += dev->cow.data_offset; -+ -+ /* The data on disk must be page aligned */ -+ if((offset % UBD_MMAP_BLOCK_SIZE) != 0) -+ return(-1); -+ -+ return(bit ? dev->fd : dev->cow.fd); -+} -+ -+static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset, -+ struct request *req, -+ struct io_thread_req *io_req) -+{ -+ int err; -+ -+ if(rq_data_dir(req) == WRITE){ -+ /* Writes are almost no-ops since the new data is already in the -+ * host page cache -+ */ -+ dev->map_writes++; -+ if(dev->cow.file != NULL) -+ cowify_bitmap(io_req->offset, io_req->length, -+ &io_req->sector_mask, &io_req->cow_offset, -+ dev->cow.bitmap, dev->cow.bitmap_offset, -+ io_req->bitmap_words, -+ dev->cow.bitmap_len); -+ } -+ else { -+ int w; -+ -+ if((dev->cow.file != NULL) && (fd == dev->cow.fd)) -+ w = 0; -+ else w = dev->openflags.w; -+ -+ if((dev->cow.file != NULL) && (fd == dev->fd)) -+ offset += dev->cow.data_offset; -+ -+ err = physmem_subst_mapping(req->buffer, fd, offset, w); -+ if(err){ -+ printk("physmem_subst_mapping failed, err = %d\n", -+ -err); -+ return(1); - } -+ dev->map_reads++; - } -+ io_req->op = UBD_MMAP; -+ io_req->buffer = req->buffer; -+ return(0); - } - - static int prepare_request(struct request *req, struct io_thread_req *io_req) - { - struct gendisk *disk = req->rq_disk; - struct ubd *dev = disk->private_data; -- __u64 block; -- int nsect; -+ __u64 offset; -+ int len, fd; - - if(req->rq_status == RQ_INACTIVE) return(1); - -- if(dev->is_dir){ -- strcpy(req->buffer, "HOSTFS:"); -- strcat(req->buffer, dev->file); -- spin_lock(&ubd_io_lock); -- end_request(req, 1); -- spin_unlock(&ubd_io_lock); -- return(1); -- } -- - if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ - printk("Write attempted on readonly ubd device %s\n", - disk->disk_name); -@@ -814,23 +985,49 @@ - return(1); - } - -- block = req->sector; -- nsect = req->current_nr_sectors; -+ offset = ((__u64) req->sector) << 9; -+ len = req->current_nr_sectors << 9; - -- io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE; - io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; - io_req->fds[1] = dev->fd; -+ io_req->map_fd = -1; -+ io_req->cow_offset = -1; -+ io_req->offset = offset; -+ io_req->length = len; -+ io_req->error = 0; -+ io_req->sector_mask = 0; -+ -+ fd = mmap_fd(req, dev, io_req->offset); -+ if(fd > 0){ -+ /* If mmapping is otherwise OK, but the first access to the -+ * page is a write, then it's not mapped in yet. So we have -+ * to write the data to disk first, then we can map the disk -+ * page in and continue normally from there. -+ */ -+ if((rq_data_dir(req) == WRITE) && !is_remapped(req->buffer)){ -+ io_req->map_fd = dev->fd; -+ io_req->map_offset = io_req->offset + -+ dev->cow.data_offset; -+ dev->write_maps++; -+ } -+ else return(prepare_mmap_request(dev, fd, io_req->offset, req, -+ io_req)); -+ } -+ -+ if(rq_data_dir(req) == READ) -+ dev->nomap_reads++; -+ else dev->nomap_writes++; -+ -+ io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; - io_req->offsets[0] = 0; - io_req->offsets[1] = dev->cow.data_offset; -- io_req->offset = ((__u64) block) << 9; -- io_req->length = nsect << 9; - io_req->buffer = req->buffer; - io_req->sectorsize = 1 << 9; -- io_req->sector_mask = 0; -- io_req->cow_offset = -1; -- io_req->error = 0; - -- if(dev->cow.file != NULL) cowify_req(io_req, dev); -+ if(dev->cow.file != NULL) -+ cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, -+ dev->cow.bitmap_len); -+ - return(0); - } - -@@ -841,7 +1038,7 @@ - int err, n; - - if(thread_fd == -1){ -- while(!list_empty(&q->queue_head)){ -+ while(!elv_queue_empty(q)){ - req = elv_next_request(q); - err = prepare_request(req, &io_req); - if(!err){ -@@ -851,7 +1048,8 @@ - } - } - else { -- if(do_ubd || list_empty(&q->queue_head)) return; -+ if(do_ubd || elv_queue_empty(q)) -+ return; - req = elv_next_request(q); - err = prepare_request(req, &io_req); - if(!err){ -@@ -885,7 +1083,7 @@ - g.heads = 128; - g.sectors = 32; - g.cylinders = dev->size / (128 * 32 * 512); -- g.start = 2; -+ g.start = get_start_sect(inode->i_bdev); - return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0); - - case HDIO_SET_UNMASKINTR: -@@ -935,6 +1133,142 @@ - return(-EINVAL); - } - -+static int ubd_check_remapped(int fd, unsigned long address, int is_write, -+ __u64 offset) -+{ -+ __u64 bitmap_offset; -+ unsigned long new_bitmap[2]; -+ int i, err, n; -+ -+ /* If it's not a write access, we can't do anything about it */ -+ if(!is_write) -+ return(0); -+ -+ /* We have a write */ -+ for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){ -+ struct ubd *dev = &ubd_dev[i]; -+ -+ if((dev->fd != fd) && (dev->cow.fd != fd)) -+ continue; -+ -+ /* It's a write to a ubd device */ -+ -+ if(!dev->openflags.w){ -+ /* It's a write access on a read-only device - probably -+ * shouldn't happen. If the kernel is trying to change -+ * something with no intention of writing it back out, -+ * then this message will clue us in that this needs -+ * fixing -+ */ -+ printk("Write access to mapped page from readonly ubd " -+ "device %d\n", i); -+ return(0); -+ } -+ -+ /* It's a write to a writeable ubd device - it must be COWed -+ * because, otherwise, the page would have been mapped in -+ * writeable -+ */ -+ -+ if(!dev->cow.file) -+ panic("Write fault on writeable non-COW ubd device %d", -+ i); -+ -+ /* It should also be an access to the backing file since the -+ * COW pages should be mapped in read-write -+ */ -+ -+ if(fd == dev->fd) -+ panic("Write fault on a backing page of ubd " -+ "device %d\n", i); -+ -+ /* So, we do the write, copying the backing data to the COW -+ * file... -+ */ -+ -+ err = os_seek_file(dev->fd, offset + dev->cow.data_offset); -+ if(err < 0) -+ panic("Couldn't seek to %lld in COW file of ubd " -+ "device %d, err = %d", -+ offset + dev->cow.data_offset, i, -err); -+ -+ n = os_write_file(dev->fd, (void *) address, PAGE_SIZE); -+ if(n != PAGE_SIZE) -+ panic("Couldn't copy data to COW file of ubd " -+ "device %d, err = %d", i, -n); -+ -+ /* ... updating the COW bitmap... */ -+ -+ cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset, -+ dev->cow.bitmap, dev->cow.bitmap_offset, -+ new_bitmap, dev->cow.bitmap_len); -+ -+ err = os_seek_file(dev->fd, bitmap_offset); -+ if(err < 0) -+ panic("Couldn't seek to %lld in COW file of ubd " -+ "device %d, err = %d", bitmap_offset, i, -err); -+ -+ n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap)); -+ if(n != sizeof(new_bitmap)) -+ panic("Couldn't update bitmap of ubd device %d, " -+ "err = %d", i, -n); -+ -+ /* Maybe we can map the COW page in, and maybe we can't. If -+ * it is a pre-V3 COW file, we can't, since the alignment will -+ * be wrong. If it is a V3 or later COW file which has been -+ * moved to a system with a larger page size, then maybe we -+ * can't, depending on the exact location of the page. -+ */ -+ -+ offset += dev->cow.data_offset; -+ -+ /* Remove the remapping, putting the original anonymous page -+ * back. If the COW file can be mapped in, that is done. -+ * Otherwise, the COW page is read in. -+ */ -+ -+ if(!physmem_remove_mapping((void *) address)) -+ panic("Address 0x%lx not remapped by ubd device %d", -+ address, i); -+ if((offset % UBD_MMAP_BLOCK_SIZE) == 0) -+ physmem_subst_mapping((void *) address, dev->fd, -+ offset, 1); -+ else { -+ err = os_seek_file(dev->fd, offset); -+ if(err < 0) -+ panic("Couldn't seek to %lld in COW file of " -+ "ubd device %d, err = %d", offset, i, -+ -err); -+ -+ n = os_read_file(dev->fd, (void *) address, PAGE_SIZE); -+ if(n != PAGE_SIZE) -+ panic("Failed to read page from offset %llx of " -+ "COW file of ubd device %d, err = %d", -+ offset, i, -n); -+ } -+ -+ return(1); -+ } -+ -+ /* It's not a write on a ubd device */ -+ return(0); -+} -+ -+static struct remapper ubd_remapper = { -+ .list = LIST_HEAD_INIT(ubd_remapper.list), -+ .proc = ubd_check_remapped, -+}; -+ -+static int ubd_remapper_setup(void) -+{ -+ if(ubd_do_mmap) -+ register_remapper(&ubd_remapper); -+ -+ return(0); -+} -+ -+__initcall(ubd_remapper_setup); -+ - /* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically -Index: uml-2.6.7/arch/um/drivers/xterm.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/xterm.c 2004-07-16 19:35:55.930765080 +0300 -+++ uml-2.6.7/arch/um/drivers/xterm.c 2004-07-16 19:47:23.699208384 +0300 -@@ -8,7 +8,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -36,7 +35,8 @@ - { - struct xterm_chan *data; - -- if((data = malloc(sizeof(*data))) == NULL) return(NULL); -+ data = malloc(sizeof(*data)); -+ if(data == NULL) return(NULL); - *data = ((struct xterm_chan) { .pid = -1, - .helper_pid = -1, - .device = device, -@@ -93,7 +93,7 @@ - "/usr/lib/uml/port-helper", "-uml-socket", - file, NULL }; - -- if(access(argv[4], X_OK)) -+ if(os_access(argv[4], OS_ACC_X_OK) < 0) - argv[4] = "port-helper"; - - fd = mkstemp(file); -@@ -106,13 +106,13 @@ - printk("xterm_open : unlink failed, errno = %d\n", errno); - return(-errno); - } -- close(fd); -+ os_close_file(fd); - -- fd = create_unix_socket(file, sizeof(file)); -+ fd = os_create_unix_socket(file, sizeof(file), 1); - if(fd < 0){ - printk("xterm_open : create_unix_socket failed, errno = %d\n", - -fd); -- return(-fd); -+ return(fd); - } - - sprintf(title, data->title, data->device); -@@ -128,15 +128,16 @@ - if(data->direct_rcv) - new = os_rcv_fd(fd, &data->helper_pid); - else { -- if((err = os_set_fd_block(fd, 0)) != 0){ -+ err = os_set_fd_block(fd, 0); -+ if(err < 0){ - printk("xterm_open : failed to set descriptor " -- "non-blocking, errno = %d\n", err); -+ "non-blocking, err = %d\n", -err); - return(err); - } - new = xterm_fd(fd, &data->helper_pid); - } - if(new < 0){ -- printk("xterm_open : os_rcv_fd failed, errno = %d\n", -new); -+ printk("xterm_open : os_rcv_fd failed, err = %d\n", -new); - goto out; - } - -@@ -160,7 +161,7 @@ - if(data->helper_pid != -1) - os_kill_process(data->helper_pid, 0); - data->helper_pid = -1; -- close(fd); -+ os_close_file(fd); - } - - void xterm_free(void *d) -Index: uml-2.6.7/arch/um/drivers/mconsole_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/mconsole_kern.c 2004-07-16 19:35:52.654263184 +0300 -+++ uml-2.6.7/arch/um/drivers/mconsole_kern.c 2004-07-16 19:47:23.687210208 +0300 -@@ -1,6 +1,6 @@ - /* - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) -- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -@@ -15,6 +15,9 @@ - #include "linux/sysrq.h" - #include "linux/workqueue.h" - #include "linux/module.h" -+#include "linux/file.h" -+#include "linux/fs.h" -+#include "linux/namei.h" - #include "linux/proc_fs.h" - #include "asm/irq.h" - #include "asm/uaccess.h" -@@ -27,6 +30,7 @@ - #include "init.h" - #include "os.h" - #include "umid.h" -+#include "irq_kern.h" - - static int do_unlink_socket(struct notifier_block *notifier, - unsigned long what, void *data) -@@ -67,7 +71,7 @@ - - DECLARE_WORK(mconsole_work, mc_work_proc, NULL); - --void mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs) -+irqreturn_t mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs) - { - int fd; - struct mconsole_entry *new; -@@ -75,9 +79,10 @@ - - fd = (int) dev_id; - while (mconsole_get_request(fd, &req)){ -- if(req.cmd->as_interrupt) (*req.cmd->handler)(&req); -+ if(req.cmd->context == MCONSOLE_INTR) -+ (*req.cmd->handler)(&req); - else { -- new = kmalloc(sizeof(req), GFP_ATOMIC); -+ new = kmalloc(sizeof(*new), GFP_ATOMIC); - if(new == NULL) - mconsole_reply(&req, "Out of memory", 1, 0); - else { -@@ -88,6 +93,7 @@ - } - if(!list_empty(&mc_requests)) schedule_work(&mconsole_work); - reactivate_fd(fd, MCONSOLE_IRQ); -+ return(IRQ_HANDLED); - } - - void mconsole_version(struct mc_request *req) -@@ -100,20 +106,109 @@ - mconsole_reply(req, version, 0, 0); - } - -+void mconsole_log(struct mc_request *req) -+{ -+ int len; -+ char *ptr = req->request.data; -+ -+ ptr += strlen("log "); -+ -+ len = req->len - (ptr - req->request.data); -+ printk("%.*s", len, ptr); -+ mconsole_reply(req, "", 0, 0); -+} -+ -+void mconsole_proc(struct mc_request *req) -+{ -+ struct nameidata nd; -+ struct file_system_type *proc; -+ struct super_block *super; -+ struct file *file; -+ int n, err; -+ char *ptr = req->request.data, *buf; -+ -+ ptr += strlen("proc"); -+ while(isspace(*ptr)) ptr++; -+ -+ proc = get_fs_type("proc"); -+ if(proc == NULL){ -+ mconsole_reply(req, "procfs not registered", 1, 0); -+ goto out; -+ } -+ -+ super = (*proc->get_sb)(proc, 0, NULL, NULL); -+ put_filesystem(proc); -+ if(super == NULL){ -+ mconsole_reply(req, "Failed to get procfs superblock", 1, 0); -+ goto out; -+ } -+ up_write(&super->s_umount); -+ -+ nd.dentry = super->s_root; -+ nd.mnt = NULL; -+ nd.flags = O_RDONLY + 1; -+ nd.last_type = LAST_ROOT; -+ -+ err = link_path_walk(ptr, &nd); -+ if(err){ -+ mconsole_reply(req, "Failed to look up file", 1, 0); -+ goto out_kill; -+ } -+ -+ file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ if(IS_ERR(file)){ -+ mconsole_reply(req, "Failed to open file", 1, 0); -+ goto out_kill; -+ } -+ -+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL); -+ if(buf == NULL){ -+ mconsole_reply(req, "Failed to allocate buffer", 1, 0); -+ goto out_fput; -+ } -+ -+ if((file->f_op != NULL) && (file->f_op->read != NULL)){ -+ do { -+ n = (*file->f_op->read)(file, buf, PAGE_SIZE - 1, -+ &file->f_pos); -+ if(n >= 0){ -+ buf[n] = '\0'; -+ mconsole_reply(req, buf, 0, (n > 0)); -+ } -+ else { -+ mconsole_reply(req, "Read of file failed", -+ 1, 0); -+ goto out_free; -+ } -+ } while(n > 0); -+ } -+ else mconsole_reply(req, "", 0, 0); -+ -+ out_free: -+ kfree(buf); -+ out_fput: -+ fput(file); -+ out_kill: -+ deactivate_super(super); -+ out: ; -+} -+ - #define UML_MCONSOLE_HELPTEXT \ --"Commands: -- version - Get kernel version -- help - Print this message -- halt - Halt UML -- reboot - Reboot UML -- config = - Add a new device to UML; -- same syntax as command line -- config - Query the configuration of a device -- remove - Remove a device from UML -- sysrq - Performs the SysRq action controlled by the letter -- cad - invoke the Ctl-Alt-Del handler -- stop - pause the UML; it will do nothing until it receives a 'go' -- go - continue the UML after a 'stop' -+"Commands: \n\ -+ version - Get kernel version \n\ -+ help - Print this message \n\ -+ halt - Halt UML \n\ -+ reboot - Reboot UML \n\ -+ config = - Add a new device to UML; \n\ -+ same syntax as command line \n\ -+ config - Query the configuration of a device \n\ -+ remove - Remove a device from UML \n\ -+ sysrq - Performs the SysRq action controlled by the letter \n\ -+ cad - invoke the Ctl-Alt-Del handler \n\ -+ stop - pause the UML; it will do nothing until it receives a 'go' \n\ -+ go - continue the UML after a 'stop' \n\ -+ log - make UML enter into the kernel log\n\ -+ proc - returns the contents of the UML's /proc/\n\ - " - - void mconsole_help(struct mc_request *req) -@@ -302,7 +397,7 @@ - if(umid_file_name("mconsole", file, sizeof(file))) return(-1); - snprintf(mconsole_socket_name, sizeof(file), "%s", file); - -- sock = create_unix_socket(file, sizeof(file)); -+ sock = os_create_unix_socket(file, sizeof(file), 1); - if (sock < 0){ - printk("Failed to initialize management console\n"); - return(1); -@@ -344,11 +439,16 @@ - if(buf == NULL) - return(-ENOMEM); - -- if(copy_from_user(buf, buffer, count)) -- return(-EFAULT); -+ if(copy_from_user(buf, buffer, count)){ -+ count = -EFAULT; -+ goto out; -+ } -+ - buf[count] = '\0'; - - mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count); -+ out: -+ kfree(buf); - return(count); - } - -Index: uml-2.6.7/arch/um/Makefile-skas -=================================================================== ---- uml-2.6.7.orig/arch/um/Makefile-skas 2004-07-16 19:36:37.092507544 +0300 -+++ uml-2.6.7/arch/um/Makefile-skas 2004-07-16 19:47:23.764198504 +0300 -@@ -14,7 +14,7 @@ - LINK_SKAS = -Wl,-rpath,/lib - LD_SCRIPT_SKAS = dyn.lds.s - --GEN_HEADERS += $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h -+GEN_HEADERS += $(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h - --$(ARCH_DIR)/kernel/skas/include/skas_ptregs.h : -- $(MAKE) -C $(ARCH_DIR)/kernel/skas include/skas_ptregs.h -+$(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h : -+ $(Q)$(MAKE) $(build)=$(ARCH_DIR)/kernel/skas $@ -Index: uml-2.6.7/arch/um/kernel/sys_call_table.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/sys_call_table.c 2004-07-16 19:37:20.386925792 +0300 -+++ uml-2.6.7/arch/um/kernel/sys_call_table.c 2004-07-16 19:47:23.739202304 +0300 -@@ -5,7 +5,6 @@ - - #include "linux/config.h" - #include "linux/unistd.h" --#include "linux/version.h" - #include "linux/sys.h" - #include "linux/swap.h" - #include "linux/syscalls.h" -@@ -14,251 +13,50 @@ - #include "sysdep/syscalls.h" - #include "kern_util.h" - --extern syscall_handler_t sys_restart_syscall; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_exit; -+#ifdef CONFIG_NFSD -+#define NFSSERVCTL sys_nfsservctl -+#else -+#define NFSSERVCTL sys_ni_syscall -+#endif -+ -+#define LAST_GENERIC_SYSCALL __NR_vserver -+ -+#if LAST_GENERIC_SYSCALL > LAST_ARCH_SYSCALL -+#define LAST_SYSCALL LAST_GENERIC_SYSCALL -+#else -+#define LAST_SYSCALL LAST_ARCH_SYSCALL -+#endif -+ - extern syscall_handler_t sys_fork; --extern syscall_handler_t sys_creat; --extern syscall_handler_t sys_link; --extern syscall_handler_t sys_unlink; --extern syscall_handler_t sys_chdir; --extern syscall_handler_t sys_mknod; --extern syscall_handler_t sys_chmod; --extern syscall_handler_t sys_lchown16; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_stat; --extern syscall_handler_t sys_getpid; --extern syscall_handler_t sys_oldumount; --extern syscall_handler_t sys_setuid16; --extern syscall_handler_t sys_getuid16; -+extern syscall_handler_t sys_execve; -+extern syscall_handler_t um_time; -+extern syscall_handler_t um_mount; -+extern syscall_handler_t um_stime; - extern syscall_handler_t sys_ptrace; --extern syscall_handler_t sys_alarm; --extern syscall_handler_t sys_fstat; --extern syscall_handler_t sys_pause; --extern syscall_handler_t sys_utime; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_access; --extern syscall_handler_t sys_nice; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_sync; --extern syscall_handler_t sys_kill; --extern syscall_handler_t sys_rename; --extern syscall_handler_t sys_mkdir; --extern syscall_handler_t sys_rmdir; - extern syscall_handler_t sys_pipe; --extern syscall_handler_t sys_times; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_brk; --extern syscall_handler_t sys_setgid16; --extern syscall_handler_t sys_getgid16; --extern syscall_handler_t sys_signal; --extern syscall_handler_t sys_geteuid16; --extern syscall_handler_t sys_getegid16; --extern syscall_handler_t sys_acct; --extern syscall_handler_t sys_umount; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_ioctl; --extern syscall_handler_t sys_fcntl; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_setpgid; --extern syscall_handler_t sys_ni_syscall; - extern syscall_handler_t sys_olduname; --extern syscall_handler_t sys_umask; --extern syscall_handler_t sys_chroot; --extern syscall_handler_t sys_ustat; --extern syscall_handler_t sys_dup2; --extern syscall_handler_t sys_getppid; --extern syscall_handler_t sys_getpgrp; - extern syscall_handler_t sys_sigaction; --extern syscall_handler_t sys_sgetmask; --extern syscall_handler_t sys_ssetmask; --extern syscall_handler_t sys_setreuid16; --extern syscall_handler_t sys_setregid16; - extern syscall_handler_t sys_sigsuspend; --extern syscall_handler_t sys_sigpending; --extern syscall_handler_t sys_sethostname; --extern syscall_handler_t sys_setrlimit; --extern syscall_handler_t sys_old_getrlimit; --extern syscall_handler_t sys_getrusage; --extern syscall_handler_t sys_gettimeofday; --extern syscall_handler_t sys_settimeofday; --extern syscall_handler_t sys_getgroups16; --extern syscall_handler_t sys_setgroups16; --extern syscall_handler_t sys_symlink; --extern syscall_handler_t sys_lstat; --extern syscall_handler_t sys_readlink; --extern syscall_handler_t sys_swapon; --extern syscall_handler_t sys_uselib; --extern syscall_handler_t sys_reboot; - extern syscall_handler_t old_readdir; --extern syscall_handler_t sys_munmap; --extern syscall_handler_t sys_truncate; --extern syscall_handler_t sys_ftruncate; --extern syscall_handler_t sys_fchmod; --extern syscall_handler_t sys_fchown16; --extern syscall_handler_t sys_getpriority; --extern syscall_handler_t sys_setpriority; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_statfs; --extern syscall_handler_t sys_fstatfs; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_socketcall; --extern syscall_handler_t sys_syslog; --extern syscall_handler_t sys_setitimer; --extern syscall_handler_t sys_getitimer; --extern syscall_handler_t sys_newstat; --extern syscall_handler_t sys_newlstat; --extern syscall_handler_t sys_newfstat; - extern syscall_handler_t sys_uname; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_vhangup; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_swapoff; --extern syscall_handler_t sys_sysinfo; - extern syscall_handler_t sys_ipc; --extern syscall_handler_t sys_fsync; - extern syscall_handler_t sys_sigreturn; --extern syscall_handler_t sys_rt_sigreturn; - extern syscall_handler_t sys_clone; --extern syscall_handler_t sys_setdomainname; --extern syscall_handler_t sys_newuname; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_adjtimex; --extern syscall_handler_t sys_mprotect; --extern syscall_handler_t sys_sigprocmask; --extern syscall_handler_t sys_init_module; --extern syscall_handler_t sys_delete_module; --extern syscall_handler_t sys_quotactl; --extern syscall_handler_t sys_getpgid; --extern syscall_handler_t sys_fchdir; --extern syscall_handler_t sys_bdflush; --extern syscall_handler_t sys_sysfs; --extern syscall_handler_t sys_personality; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_setfsuid16; --extern syscall_handler_t sys_setfsgid16; --extern syscall_handler_t sys_llseek; --extern syscall_handler_t sys_getdents; --extern syscall_handler_t sys_flock; --extern syscall_handler_t sys_msync; --extern syscall_handler_t sys_readv; --extern syscall_handler_t sys_writev; --extern syscall_handler_t sys_getsid; --extern syscall_handler_t sys_fdatasync; --extern syscall_handler_t sys_mlock; --extern syscall_handler_t sys_munlock; --extern syscall_handler_t sys_mlockall; --extern syscall_handler_t sys_munlockall; --extern syscall_handler_t sys_sched_setparam; --extern syscall_handler_t sys_sched_getparam; --extern syscall_handler_t sys_sched_setscheduler; --extern syscall_handler_t sys_sched_getscheduler; --extern syscall_handler_t sys_sched_get_priority_max; --extern syscall_handler_t sys_sched_get_priority_min; --extern syscall_handler_t sys_sched_rr_get_interval; --extern syscall_handler_t sys_nanosleep; --extern syscall_handler_t sys_mremap; --extern syscall_handler_t sys_setresuid16; --extern syscall_handler_t sys_getresuid16; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_poll; --extern syscall_handler_t sys_nfsservctl; --extern syscall_handler_t sys_setresgid16; --extern syscall_handler_t sys_getresgid16; --extern syscall_handler_t sys_prctl; --extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_rt_sigreturn; - extern syscall_handler_t sys_rt_sigaction; --extern syscall_handler_t sys_rt_sigprocmask; --extern syscall_handler_t sys_rt_sigpending; --extern syscall_handler_t sys_rt_sigtimedwait; --extern syscall_handler_t sys_rt_sigqueueinfo; --extern syscall_handler_t sys_rt_sigsuspend; --extern syscall_handler_t sys_pread64; --extern syscall_handler_t sys_pwrite64; --extern syscall_handler_t sys_chown16; --extern syscall_handler_t sys_getcwd; --extern syscall_handler_t sys_capget; --extern syscall_handler_t sys_capset; - extern syscall_handler_t sys_sigaltstack; --extern syscall_handler_t sys_sendfile; --extern syscall_handler_t sys_ni_syscall; --extern syscall_handler_t sys_ni_syscall; - extern syscall_handler_t sys_vfork; --extern syscall_handler_t sys_getrlimit; - extern syscall_handler_t sys_mmap2; --extern syscall_handler_t sys_truncate64; --extern syscall_handler_t sys_ftruncate64; --extern syscall_handler_t sys_stat64; --extern syscall_handler_t sys_lstat64; --extern syscall_handler_t sys_fstat64; --extern syscall_handler_t sys_lchown; --extern syscall_handler_t sys_getuid; --extern syscall_handler_t sys_getgid; --extern syscall_handler_t sys_geteuid; --extern syscall_handler_t sys_getegid; --extern syscall_handler_t sys_setreuid; --extern syscall_handler_t sys_setregid; --extern syscall_handler_t sys_getgroups; --extern syscall_handler_t sys_setgroups; --extern syscall_handler_t sys_fchown; --extern syscall_handler_t sys_setresuid; --extern syscall_handler_t sys_getresuid; --extern syscall_handler_t sys_setresgid; --extern syscall_handler_t sys_getresgid; --extern syscall_handler_t sys_chown; --extern syscall_handler_t sys_setuid; --extern syscall_handler_t sys_setgid; --extern syscall_handler_t sys_setfsuid; --extern syscall_handler_t sys_setfsgid; --extern syscall_handler_t sys_pivot_root; --extern syscall_handler_t sys_mincore; --extern syscall_handler_t sys_madvise; --extern syscall_handler_t sys_fcntl64; --extern syscall_handler_t sys_getdents64; --extern syscall_handler_t sys_gettid; --extern syscall_handler_t sys_readahead; --extern syscall_handler_t sys_tkill; --extern syscall_handler_t sys_sendfile64; --extern syscall_handler_t sys_futex; --extern syscall_handler_t sys_sched_setaffinity; --extern syscall_handler_t sys_sched_getaffinity; --extern syscall_handler_t sys_io_setup; --extern syscall_handler_t sys_io_destroy; --extern syscall_handler_t sys_io_getevents; --extern syscall_handler_t sys_io_submit; --extern syscall_handler_t sys_io_cancel; --extern syscall_handler_t sys_exit_group; --extern syscall_handler_t sys_lookup_dcookie; --extern syscall_handler_t sys_epoll_create; --extern syscall_handler_t sys_epoll_ctl; --extern syscall_handler_t sys_epoll_wait; --extern syscall_handler_t sys_remap_file_pages; --extern syscall_handler_t sys_set_tid_address; -- --#ifdef CONFIG_NFSD --#define NFSSERVCTL sys_nfsservctl --#else --#define NFSSERVCTL sys_ni_syscall --#endif -- --extern syscall_handler_t um_mount; --extern syscall_handler_t um_time; --extern syscall_handler_t um_stime; -- --#define LAST_GENERIC_SYSCALL __NR_set_tid_address -- --#if LAST_GENERIC_SYSCALL > LAST_ARCH_SYSCALL --#define LAST_SYSCALL LAST_GENERIC_SYSCALL --#else --#define LAST_SYSCALL LAST_ARCH_SYSCALL --#endif -+extern syscall_handler_t sys_timer_create; -+extern syscall_handler_t old_mmap_i386; -+extern syscall_handler_t old_select; -+extern syscall_handler_t sys_modify_ldt; -+extern syscall_handler_t sys_rt_sigsuspend; - - syscall_handler_t *sys_call_table[] = { -- [ __NR_restart_syscall ] = sys_restart_syscall, -- [ __NR_exit ] = sys_exit, -- [ __NR_fork ] = sys_fork, -+ [ __NR_restart_syscall ] = (syscall_handler_t *) sys_restart_syscall, -+ [ __NR_exit ] (syscall_handler_t *) sys_exit, -+ [ __NR_fork ] (syscall_handler_t *) sys_fork, - [ __NR_read ] = (syscall_handler_t *) sys_read, - [ __NR_write ] = (syscall_handler_t *) sys_write, - -@@ -266,229 +64,249 @@ - [ __NR_open ] = (syscall_handler_t *) sys_open, - [ __NR_close ] = (syscall_handler_t *) sys_close, - [ __NR_waitpid ] = (syscall_handler_t *) sys_waitpid, -- [ __NR_creat ] = sys_creat, -- [ __NR_link ] = sys_link, -- [ __NR_unlink ] = sys_unlink, -+ [ __NR_creat ] (syscall_handler_t *) sys_creat, -+ [ __NR_link ] (syscall_handler_t *) sys_link, -+ [ __NR_unlink ] (syscall_handler_t *) sys_unlink, - [ __NR_execve ] = (syscall_handler_t *) sys_execve, - - /* declared differently in kern_util.h */ -- [ __NR_chdir ] = sys_chdir, -+ [ __NR_chdir ] (syscall_handler_t *) sys_chdir, - [ __NR_time ] = um_time, -- [ __NR_mknod ] = sys_mknod, -- [ __NR_chmod ] = sys_chmod, -- [ __NR_lchown ] = sys_lchown16, -- [ __NR_break ] = sys_ni_syscall, -- [ __NR_oldstat ] = sys_stat, -+ [ __NR_mknod ] (syscall_handler_t *) sys_mknod, -+ [ __NR_chmod ] (syscall_handler_t *) sys_chmod, -+ [ __NR_lchown ] (syscall_handler_t *) sys_lchown16, -+ [ __NR_break ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_oldstat ] (syscall_handler_t *) sys_stat, - [ __NR_lseek ] = (syscall_handler_t *) sys_lseek, -- [ __NR_getpid ] = sys_getpid, -+ [ __NR_getpid ] (syscall_handler_t *) sys_getpid, - [ __NR_mount ] = um_mount, -- [ __NR_umount ] = sys_oldumount, -- [ __NR_setuid ] = sys_setuid16, -- [ __NR_getuid ] = sys_getuid16, -+ [ __NR_umount ] (syscall_handler_t *) sys_oldumount, -+ [ __NR_setuid ] (syscall_handler_t *) sys_setuid16, -+ [ __NR_getuid ] (syscall_handler_t *) sys_getuid16, - [ __NR_stime ] = um_stime, -- [ __NR_ptrace ] = sys_ptrace, -- [ __NR_alarm ] = sys_alarm, -- [ __NR_oldfstat ] = sys_fstat, -- [ __NR_pause ] = sys_pause, -- [ __NR_utime ] = sys_utime, -- [ __NR_stty ] = sys_ni_syscall, -- [ __NR_gtty ] = sys_ni_syscall, -- [ __NR_access ] = sys_access, -- [ __NR_nice ] = sys_nice, -- [ __NR_ftime ] = sys_ni_syscall, -- [ __NR_sync ] = sys_sync, -- [ __NR_kill ] = sys_kill, -- [ __NR_rename ] = sys_rename, -- [ __NR_mkdir ] = sys_mkdir, -- [ __NR_rmdir ] = sys_rmdir, -+ [ __NR_ptrace ] (syscall_handler_t *) sys_ptrace, -+ [ __NR_alarm ] (syscall_handler_t *) sys_alarm, -+ [ __NR_oldfstat ] (syscall_handler_t *) sys_fstat, -+ [ __NR_pause ] (syscall_handler_t *) sys_pause, -+ [ __NR_utime ] (syscall_handler_t *) sys_utime, -+ [ __NR_stty ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_gtty ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_access ] (syscall_handler_t *) sys_access, -+ [ __NR_nice ] (syscall_handler_t *) sys_nice, -+ [ __NR_ftime ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_sync ] (syscall_handler_t *) sys_sync, -+ [ __NR_kill ] (syscall_handler_t *) sys_kill, -+ [ __NR_rename ] (syscall_handler_t *) sys_rename, -+ [ __NR_mkdir ] (syscall_handler_t *) sys_mkdir, -+ [ __NR_rmdir ] (syscall_handler_t *) sys_rmdir, - - /* Declared differently in asm/unistd.h */ - [ __NR_dup ] = (syscall_handler_t *) sys_dup, -- [ __NR_pipe ] = sys_pipe, -- [ __NR_times ] = sys_times, -- [ __NR_prof ] = sys_ni_syscall, -- [ __NR_brk ] = sys_brk, -- [ __NR_setgid ] = sys_setgid16, -- [ __NR_getgid ] = sys_getgid16, -- [ __NR_signal ] = sys_signal, -- [ __NR_geteuid ] = sys_geteuid16, -- [ __NR_getegid ] = sys_getegid16, -- [ __NR_acct ] = sys_acct, -- [ __NR_umount2 ] = sys_umount, -- [ __NR_lock ] = sys_ni_syscall, -- [ __NR_ioctl ] = sys_ioctl, -- [ __NR_fcntl ] = sys_fcntl, -- [ __NR_mpx ] = sys_ni_syscall, -- [ __NR_setpgid ] = sys_setpgid, -- [ __NR_ulimit ] = sys_ni_syscall, -- [ __NR_oldolduname ] = sys_olduname, -- [ __NR_umask ] = sys_umask, -- [ __NR_chroot ] = sys_chroot, -- [ __NR_ustat ] = sys_ustat, -- [ __NR_dup2 ] = sys_dup2, -- [ __NR_getppid ] = sys_getppid, -- [ __NR_getpgrp ] = sys_getpgrp, -+ [ __NR_pipe ] (syscall_handler_t *) sys_pipe, -+ [ __NR_times ] (syscall_handler_t *) sys_times, -+ [ __NR_prof ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_brk ] (syscall_handler_t *) sys_brk, -+ [ __NR_setgid ] (syscall_handler_t *) sys_setgid16, -+ [ __NR_getgid ] (syscall_handler_t *) sys_getgid16, -+ [ __NR_signal ] (syscall_handler_t *) sys_signal, -+ [ __NR_geteuid ] (syscall_handler_t *) sys_geteuid16, -+ [ __NR_getegid ] (syscall_handler_t *) sys_getegid16, -+ [ __NR_acct ] (syscall_handler_t *) sys_acct, -+ [ __NR_umount2 ] (syscall_handler_t *) sys_umount, -+ [ __NR_lock ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_ioctl ] (syscall_handler_t *) sys_ioctl, -+ [ __NR_fcntl ] (syscall_handler_t *) sys_fcntl, -+ [ __NR_mpx ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_setpgid ] (syscall_handler_t *) sys_setpgid, -+ [ __NR_ulimit ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_oldolduname ] (syscall_handler_t *) sys_olduname, -+ [ __NR_umask ] (syscall_handler_t *) sys_umask, -+ [ __NR_chroot ] (syscall_handler_t *) sys_chroot, -+ [ __NR_ustat ] (syscall_handler_t *) sys_ustat, -+ [ __NR_dup2 ] (syscall_handler_t *) sys_dup2, -+ [ __NR_getppid ] (syscall_handler_t *) sys_getppid, -+ [ __NR_getpgrp ] (syscall_handler_t *) sys_getpgrp, - [ __NR_setsid ] = (syscall_handler_t *) sys_setsid, -- [ __NR_sigaction ] = sys_sigaction, -- [ __NR_sgetmask ] = sys_sgetmask, -- [ __NR_ssetmask ] = sys_ssetmask, -- [ __NR_setreuid ] = sys_setreuid16, -- [ __NR_setregid ] = sys_setregid16, -- [ __NR_sigsuspend ] = sys_sigsuspend, -- [ __NR_sigpending ] = sys_sigpending, -- [ __NR_sethostname ] = sys_sethostname, -- [ __NR_setrlimit ] = sys_setrlimit, -- [ __NR_getrlimit ] = sys_old_getrlimit, -- [ __NR_getrusage ] = sys_getrusage, -- [ __NR_gettimeofday ] = sys_gettimeofday, -- [ __NR_settimeofday ] = sys_settimeofday, -- [ __NR_getgroups ] = sys_getgroups16, -- [ __NR_setgroups ] = sys_setgroups16, -- [ __NR_symlink ] = sys_symlink, -- [ __NR_oldlstat ] = sys_lstat, -- [ __NR_readlink ] = sys_readlink, -- [ __NR_uselib ] = sys_uselib, -+ [ __NR_sigaction ] (syscall_handler_t *) sys_sigaction, -+ [ __NR_sgetmask ] (syscall_handler_t *) sys_sgetmask, -+ [ __NR_ssetmask ] (syscall_handler_t *) sys_ssetmask, -+ [ __NR_setreuid ] (syscall_handler_t *) sys_setreuid16, -+ [ __NR_setregid ] (syscall_handler_t *) sys_setregid16, -+ [ __NR_sigsuspend ] (syscall_handler_t *) sys_sigsuspend, -+ [ __NR_sigpending ] (syscall_handler_t *) sys_sigpending, -+ [ __NR_sethostname ] (syscall_handler_t *) sys_sethostname, -+ [ __NR_setrlimit ] (syscall_handler_t *) sys_setrlimit, -+ [ __NR_getrlimit ] (syscall_handler_t *) sys_old_getrlimit, -+ [ __NR_getrusage ] (syscall_handler_t *) sys_getrusage, -+ [ __NR_gettimeofday ] (syscall_handler_t *) sys_gettimeofday, -+ [ __NR_settimeofday ] (syscall_handler_t *) sys_settimeofday, -+ [ __NR_getgroups ] (syscall_handler_t *) sys_getgroups16, -+ [ __NR_setgroups ] (syscall_handler_t *) sys_setgroups16, -+ [ __NR_symlink ] (syscall_handler_t *) sys_symlink, -+ [ __NR_oldlstat ] (syscall_handler_t *) sys_lstat, -+ [ __NR_readlink ] (syscall_handler_t *) sys_readlink, -+ [ __NR_uselib ] (syscall_handler_t *) sys_uselib, - [ __NR_swapon ] = (syscall_handler_t *) sys_swapon, -- [ __NR_reboot ] = sys_reboot, -+ [ __NR_reboot ] (syscall_handler_t *) sys_reboot, - [ __NR_readdir ] = old_readdir, -- [ __NR_munmap ] = sys_munmap, -- [ __NR_truncate ] = sys_truncate, -- [ __NR_ftruncate ] = sys_ftruncate, -- [ __NR_fchmod ] = sys_fchmod, -- [ __NR_fchown ] = sys_fchown16, -- [ __NR_getpriority ] = sys_getpriority, -- [ __NR_setpriority ] = sys_setpriority, -- [ __NR_profil ] = sys_ni_syscall, -- [ __NR_statfs ] = sys_statfs, -- [ __NR_fstatfs ] = sys_fstatfs, -- [ __NR_ioperm ] = sys_ni_syscall, -- [ __NR_socketcall ] = sys_socketcall, -- [ __NR_syslog ] = sys_syslog, -- [ __NR_setitimer ] = sys_setitimer, -- [ __NR_getitimer ] = sys_getitimer, -- [ __NR_stat ] = sys_newstat, -- [ __NR_lstat ] = sys_newlstat, -- [ __NR_fstat ] = sys_newfstat, -- [ __NR_olduname ] = sys_uname, -- [ __NR_iopl ] = sys_ni_syscall, -- [ __NR_vhangup ] = sys_vhangup, -- [ __NR_idle ] = sys_ni_syscall, -+ [ __NR_munmap ] (syscall_handler_t *) sys_munmap, -+ [ __NR_truncate ] (syscall_handler_t *) sys_truncate, -+ [ __NR_ftruncate ] (syscall_handler_t *) sys_ftruncate, -+ [ __NR_fchmod ] (syscall_handler_t *) sys_fchmod, -+ [ __NR_fchown ] (syscall_handler_t *) sys_fchown16, -+ [ __NR_getpriority ] (syscall_handler_t *) sys_getpriority, -+ [ __NR_setpriority ] (syscall_handler_t *) sys_setpriority, -+ [ __NR_profil ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_statfs ] (syscall_handler_t *) sys_statfs, -+ [ __NR_fstatfs ] (syscall_handler_t *) sys_fstatfs, -+ [ __NR_ioperm ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_socketcall ] (syscall_handler_t *) sys_socketcall, -+ [ __NR_syslog ] (syscall_handler_t *) sys_syslog, -+ [ __NR_setitimer ] (syscall_handler_t *) sys_setitimer, -+ [ __NR_getitimer ] (syscall_handler_t *) sys_getitimer, -+ [ __NR_stat ] (syscall_handler_t *) sys_newstat, -+ [ __NR_lstat ] (syscall_handler_t *) sys_newlstat, -+ [ __NR_fstat ] (syscall_handler_t *) sys_newfstat, -+ [ __NR_olduname ] (syscall_handler_t *) sys_uname, -+ [ __NR_iopl ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_vhangup ] (syscall_handler_t *) sys_vhangup, -+ [ __NR_idle ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_wait4 ] = (syscall_handler_t *) sys_wait4, - [ __NR_swapoff ] = (syscall_handler_t *) sys_swapoff, -- [ __NR_sysinfo ] = sys_sysinfo, -- [ __NR_ipc ] = sys_ipc, -- [ __NR_fsync ] = sys_fsync, -- [ __NR_sigreturn ] = sys_sigreturn, -- [ __NR_clone ] = sys_clone, -- [ __NR_setdomainname ] = sys_setdomainname, -- [ __NR_uname ] = sys_newuname, -- [ __NR_adjtimex ] = sys_adjtimex, -- [ __NR_mprotect ] = sys_mprotect, -- [ __NR_sigprocmask ] = sys_sigprocmask, -- [ __NR_create_module ] = sys_ni_syscall, -- [ __NR_init_module ] = sys_init_module, -- [ __NR_delete_module ] = sys_delete_module, -- [ __NR_get_kernel_syms ] = sys_ni_syscall, -- [ __NR_quotactl ] = sys_quotactl, -- [ __NR_getpgid ] = sys_getpgid, -- [ __NR_fchdir ] = sys_fchdir, -- [ __NR_bdflush ] = sys_bdflush, -- [ __NR_sysfs ] = sys_sysfs, -- [ __NR_personality ] = sys_personality, -- [ __NR_afs_syscall ] = sys_ni_syscall, -- [ __NR_setfsuid ] = sys_setfsuid16, -- [ __NR_setfsgid ] = sys_setfsgid16, -- [ __NR__llseek ] = sys_llseek, -- [ __NR_getdents ] = sys_getdents, -+ [ __NR_sysinfo ] (syscall_handler_t *) sys_sysinfo, -+ [ __NR_ipc ] (syscall_handler_t *) sys_ipc, -+ [ __NR_fsync ] (syscall_handler_t *) sys_fsync, -+ [ __NR_sigreturn ] (syscall_handler_t *) sys_sigreturn, -+ [ __NR_clone ] (syscall_handler_t *) sys_clone, -+ [ __NR_setdomainname ] (syscall_handler_t *) sys_setdomainname, -+ [ __NR_uname ] (syscall_handler_t *) sys_newuname, -+ [ __NR_adjtimex ] (syscall_handler_t *) sys_adjtimex, -+ [ __NR_mprotect ] (syscall_handler_t *) sys_mprotect, -+ [ __NR_sigprocmask ] (syscall_handler_t *) sys_sigprocmask, -+ [ __NR_create_module ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_init_module ] (syscall_handler_t *) sys_init_module, -+ [ __NR_delete_module ] (syscall_handler_t *) sys_delete_module, -+ [ __NR_get_kernel_syms ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_quotactl ] (syscall_handler_t *) sys_quotactl, -+ [ __NR_getpgid ] (syscall_handler_t *) sys_getpgid, -+ [ __NR_fchdir ] (syscall_handler_t *) sys_fchdir, -+ [ __NR_bdflush ] (syscall_handler_t *) sys_bdflush, -+ [ __NR_sysfs ] (syscall_handler_t *) sys_sysfs, -+ [ __NR_personality ] (syscall_handler_t *) sys_personality, -+ [ __NR_afs_syscall ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_setfsuid ] (syscall_handler_t *) sys_setfsuid16, -+ [ __NR_setfsgid ] (syscall_handler_t *) sys_setfsgid16, -+ [ __NR__llseek ] (syscall_handler_t *) sys_llseek, -+ [ __NR_getdents ] (syscall_handler_t *) sys_getdents, - [ __NR__newselect ] = (syscall_handler_t *) sys_select, -- [ __NR_flock ] = sys_flock, -- [ __NR_msync ] = sys_msync, -- [ __NR_readv ] = sys_readv, -- [ __NR_writev ] = sys_writev, -- [ __NR_getsid ] = sys_getsid, -- [ __NR_fdatasync ] = sys_fdatasync, -+ [ __NR_flock ] (syscall_handler_t *) sys_flock, -+ [ __NR_msync ] (syscall_handler_t *) sys_msync, -+ [ __NR_readv ] (syscall_handler_t *) sys_readv, -+ [ __NR_writev ] (syscall_handler_t *) sys_writev, -+ [ __NR_getsid ] (syscall_handler_t *) sys_getsid, -+ [ __NR_fdatasync ] (syscall_handler_t *) sys_fdatasync, - [ __NR__sysctl ] = (syscall_handler_t *) sys_sysctl, -- [ __NR_mlock ] = sys_mlock, -- [ __NR_munlock ] = sys_munlock, -- [ __NR_mlockall ] = sys_mlockall, -- [ __NR_munlockall ] = sys_munlockall, -- [ __NR_sched_setparam ] = sys_sched_setparam, -- [ __NR_sched_getparam ] = sys_sched_getparam, -- [ __NR_sched_setscheduler ] = sys_sched_setscheduler, -- [ __NR_sched_getscheduler ] = sys_sched_getscheduler, -+ [ __NR_mlock ] (syscall_handler_t *) sys_mlock, -+ [ __NR_munlock ] (syscall_handler_t *) sys_munlock, -+ [ __NR_mlockall ] (syscall_handler_t *) sys_mlockall, -+ [ __NR_munlockall ] (syscall_handler_t *) sys_munlockall, -+ [ __NR_sched_setparam ] (syscall_handler_t *) sys_sched_setparam, -+ [ __NR_sched_getparam ] (syscall_handler_t *) sys_sched_getparam, -+ [ __NR_sched_setscheduler ] (syscall_handler_t *) sys_sched_setscheduler, -+ [ __NR_sched_getscheduler ] (syscall_handler_t *) sys_sched_getscheduler, - [ __NR_sched_yield ] = (syscall_handler_t *) yield, -- [ __NR_sched_get_priority_max ] = sys_sched_get_priority_max, -- [ __NR_sched_get_priority_min ] = sys_sched_get_priority_min, -- [ __NR_sched_rr_get_interval ] = sys_sched_rr_get_interval, -- [ __NR_nanosleep ] = sys_nanosleep, -- [ __NR_mremap ] = sys_mremap, -- [ __NR_setresuid ] = sys_setresuid16, -- [ __NR_getresuid ] = sys_getresuid16, -- [ __NR_vm86 ] = sys_ni_syscall, -- [ __NR_query_module ] = sys_ni_syscall, -- [ __NR_poll ] = sys_poll, -- [ __NR_nfsservctl ] = NFSSERVCTL, -- [ __NR_setresgid ] = sys_setresgid16, -- [ __NR_getresgid ] = sys_getresgid16, -- [ __NR_prctl ] = sys_prctl, -- [ __NR_rt_sigreturn ] = sys_rt_sigreturn, -- [ __NR_rt_sigaction ] = sys_rt_sigaction, -- [ __NR_rt_sigprocmask ] = sys_rt_sigprocmask, -- [ __NR_rt_sigpending ] = sys_rt_sigpending, -- [ __NR_rt_sigtimedwait ] = sys_rt_sigtimedwait, -- [ __NR_rt_sigqueueinfo ] = sys_rt_sigqueueinfo, -- [ __NR_rt_sigsuspend ] = sys_rt_sigsuspend, -- [ __NR_pread64 ] = sys_pread64, -- [ __NR_pwrite64 ] = sys_pwrite64, -- [ __NR_chown ] = sys_chown16, -- [ __NR_getcwd ] = sys_getcwd, -- [ __NR_capget ] = sys_capget, -- [ __NR_capset ] = sys_capset, -- [ __NR_sigaltstack ] = sys_sigaltstack, -- [ __NR_sendfile ] = sys_sendfile, -- [ __NR_getpmsg ] = sys_ni_syscall, -- [ __NR_putpmsg ] = sys_ni_syscall, -- [ __NR_vfork ] = sys_vfork, -- [ __NR_ugetrlimit ] = sys_getrlimit, -- [ __NR_mmap2 ] = sys_mmap2, -- [ __NR_truncate64 ] = sys_truncate64, -- [ __NR_ftruncate64 ] = sys_ftruncate64, -- [ __NR_stat64 ] = sys_stat64, -- [ __NR_lstat64 ] = sys_lstat64, -- [ __NR_fstat64 ] = sys_fstat64, -- [ __NR_fcntl64 ] = sys_fcntl64, -- [ __NR_getdents64 ] = sys_getdents64, -- [ __NR_gettid ] = sys_gettid, -- [ __NR_readahead ] = sys_readahead, -- [ __NR_setxattr ] = sys_ni_syscall, -- [ __NR_lsetxattr ] = sys_ni_syscall, -- [ __NR_fsetxattr ] = sys_ni_syscall, -- [ __NR_getxattr ] = sys_ni_syscall, -- [ __NR_lgetxattr ] = sys_ni_syscall, -- [ __NR_fgetxattr ] = sys_ni_syscall, -- [ __NR_listxattr ] = sys_ni_syscall, -- [ __NR_llistxattr ] = sys_ni_syscall, -- [ __NR_flistxattr ] = sys_ni_syscall, -- [ __NR_removexattr ] = sys_ni_syscall, -- [ __NR_lremovexattr ] = sys_ni_syscall, -- [ __NR_fremovexattr ] = sys_ni_syscall, -- [ __NR_tkill ] = sys_tkill, -- [ __NR_sendfile64 ] = sys_sendfile64, -- [ __NR_futex ] = sys_futex, -- [ __NR_sched_setaffinity ] = sys_sched_setaffinity, -- [ __NR_sched_getaffinity ] = sys_sched_getaffinity, -- [ __NR_io_setup ] = sys_io_setup, -- [ __NR_io_destroy ] = sys_io_destroy, -- [ __NR_io_getevents ] = sys_io_getevents, -- [ __NR_io_submit ] = sys_io_submit, -- [ __NR_io_cancel ] = sys_io_cancel, -- [ __NR_exit_group ] = sys_exit_group, -- [ __NR_lookup_dcookie ] = sys_lookup_dcookie, -- [ __NR_epoll_create ] = sys_epoll_create, -- [ __NR_epoll_ctl ] = sys_epoll_ctl, -- [ __NR_epoll_wait ] = sys_epoll_wait, -- [ __NR_remap_file_pages ] = sys_remap_file_pages, -- [ __NR_set_tid_address ] = sys_set_tid_address, -+ [ __NR_sched_get_priority_max ] (syscall_handler_t *) sys_sched_get_priority_max, -+ [ __NR_sched_get_priority_min ] (syscall_handler_t *) sys_sched_get_priority_min, -+ [ __NR_sched_rr_get_interval ] (syscall_handler_t *) sys_sched_rr_get_interval, -+ [ __NR_nanosleep ] (syscall_handler_t *) sys_nanosleep, -+ [ __NR_mremap ] (syscall_handler_t *) sys_mremap, -+ [ __NR_setresuid ] (syscall_handler_t *) sys_setresuid16, -+ [ __NR_getresuid ] (syscall_handler_t *) sys_getresuid16, -+ [ __NR_vm86 ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_query_module ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_poll ] (syscall_handler_t *) sys_poll, -+ [ __NR_nfsservctl ] = (syscall_handler_t *) NFSSERVCTL, -+ [ __NR_setresgid ] (syscall_handler_t *) sys_setresgid16, -+ [ __NR_getresgid ] (syscall_handler_t *) sys_getresgid16, -+ [ __NR_prctl ] (syscall_handler_t *) sys_prctl, -+ [ __NR_rt_sigreturn ] (syscall_handler_t *) sys_rt_sigreturn, -+ [ __NR_rt_sigaction ] (syscall_handler_t *) sys_rt_sigaction, -+ [ __NR_rt_sigprocmask ] (syscall_handler_t *) sys_rt_sigprocmask, -+ [ __NR_rt_sigpending ] (syscall_handler_t *) sys_rt_sigpending, -+ [ __NR_rt_sigtimedwait ] (syscall_handler_t *) sys_rt_sigtimedwait, -+ [ __NR_rt_sigqueueinfo ] (syscall_handler_t *) sys_rt_sigqueueinfo, -+ [ __NR_rt_sigsuspend ] (syscall_handler_t *) sys_rt_sigsuspend, -+ [ __NR_pread64 ] (syscall_handler_t *) sys_pread64, -+ [ __NR_pwrite64 ] (syscall_handler_t *) sys_pwrite64, -+ [ __NR_chown ] (syscall_handler_t *) sys_chown16, -+ [ __NR_getcwd ] (syscall_handler_t *) sys_getcwd, -+ [ __NR_capget ] (syscall_handler_t *) sys_capget, -+ [ __NR_capset ] (syscall_handler_t *) sys_capset, -+ [ __NR_sigaltstack ] (syscall_handler_t *) sys_sigaltstack, -+ [ __NR_sendfile ] (syscall_handler_t *) sys_sendfile, -+ [ __NR_getpmsg ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_putpmsg ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_vfork ] (syscall_handler_t *) sys_vfork, -+ [ __NR_ugetrlimit ] (syscall_handler_t *) sys_getrlimit, -+ [ __NR_mmap2 ] (syscall_handler_t *) sys_mmap2, -+ [ __NR_truncate64 ] (syscall_handler_t *) sys_truncate64, -+ [ __NR_ftruncate64 ] (syscall_handler_t *) sys_ftruncate64, -+ [ __NR_stat64 ] (syscall_handler_t *) sys_stat64, -+ [ __NR_lstat64 ] (syscall_handler_t *) sys_lstat64, -+ [ __NR_fstat64 ] (syscall_handler_t *) sys_fstat64, -+ [ __NR_getdents64 ] (syscall_handler_t *) sys_getdents64, -+ [ __NR_fcntl64 ] (syscall_handler_t *) sys_fcntl64, -+ [ 223 ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_gettid ] (syscall_handler_t *) sys_gettid, -+ [ __NR_readahead ] (syscall_handler_t *) sys_readahead, -+ [ __NR_setxattr ] (syscall_handler_t *) sys_setxattr, -+ [ __NR_lsetxattr ] (syscall_handler_t *) sys_lsetxattr, -+ [ __NR_fsetxattr ] (syscall_handler_t *) sys_fsetxattr, -+ [ __NR_getxattr ] (syscall_handler_t *) sys_getxattr, -+ [ __NR_lgetxattr ] (syscall_handler_t *) sys_lgetxattr, -+ [ __NR_fgetxattr ] (syscall_handler_t *) sys_fgetxattr, -+ [ __NR_listxattr ] (syscall_handler_t *) sys_listxattr, -+ [ __NR_llistxattr ] (syscall_handler_t *) sys_llistxattr, -+ [ __NR_flistxattr ] (syscall_handler_t *) sys_flistxattr, -+ [ __NR_removexattr ] (syscall_handler_t *) sys_removexattr, -+ [ __NR_lremovexattr ] (syscall_handler_t *) sys_lremovexattr, -+ [ __NR_fremovexattr ] (syscall_handler_t *) sys_fremovexattr, -+ [ __NR_tkill ] (syscall_handler_t *) sys_tkill, -+ [ __NR_sendfile64 ] (syscall_handler_t *) sys_sendfile64, -+ [ __NR_futex ] (syscall_handler_t *) sys_futex, -+ [ __NR_sched_setaffinity ] (syscall_handler_t *) sys_sched_setaffinity, -+ [ __NR_sched_getaffinity ] (syscall_handler_t *) sys_sched_getaffinity, -+ [ __NR_set_thread_area ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_get_thread_area ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_io_setup ] (syscall_handler_t *) sys_io_setup, -+ [ __NR_io_destroy ] (syscall_handler_t *) sys_io_destroy, -+ [ __NR_io_getevents ] (syscall_handler_t *) sys_io_getevents, -+ [ __NR_io_submit ] (syscall_handler_t *) sys_io_submit, -+ [ __NR_io_cancel ] (syscall_handler_t *) sys_io_cancel, -+ [ __NR_fadvise64 ] (syscall_handler_t *) sys_fadvise64, -+ [ 251 ] (syscall_handler_t *) sys_ni_syscall, -+ [ __NR_exit_group ] (syscall_handler_t *) sys_exit_group, -+ [ __NR_lookup_dcookie ] (syscall_handler_t *) sys_lookup_dcookie, -+ [ __NR_epoll_create ] (syscall_handler_t *) sys_epoll_create, -+ [ __NR_epoll_ctl ] (syscall_handler_t *) sys_epoll_ctl, -+ [ __NR_epoll_wait ] (syscall_handler_t *) sys_epoll_wait, -+ [ __NR_remap_file_pages ] (syscall_handler_t *) sys_remap_file_pages, -+ [ __NR_set_tid_address ] (syscall_handler_t *) sys_set_tid_address, -+ [ __NR_timer_create ] (syscall_handler_t *) sys_timer_create, -+ [ __NR_timer_settime ] (syscall_handler_t *) sys_timer_settime, -+ [ __NR_timer_gettime ] (syscall_handler_t *) sys_timer_gettime, -+ [ __NR_timer_getoverrun ] (syscall_handler_t *) sys_timer_getoverrun, -+ [ __NR_timer_delete ] (syscall_handler_t *) sys_timer_delete, -+ [ __NR_clock_settime ] (syscall_handler_t *) sys_clock_settime, -+ [ __NR_clock_gettime ] (syscall_handler_t *) sys_clock_gettime, -+ [ __NR_clock_getres ] (syscall_handler_t *) sys_clock_getres, -+ [ __NR_clock_nanosleep ] (syscall_handler_t *) sys_clock_nanosleep, -+ [ __NR_statfs64 ] (syscall_handler_t *) sys_statfs64, -+ [ __NR_fstatfs64 ] (syscall_handler_t *) sys_fstatfs64, -+ [ __NR_tgkill ] (syscall_handler_t *) sys_tgkill, -+ [ __NR_utimes ] (syscall_handler_t *) sys_utimes, -+ [ __NR_fadvise64_64 ] (syscall_handler_t *) sys_fadvise64_64, -+ [ __NR_vserver ] (syscall_handler_t *) sys_ni_syscall, - - ARCH_SYSCALLS - [ LAST_SYSCALL + 1 ... NR_syscalls ] = -Index: uml-2.6.7/arch/um/kernel/exec_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/exec_kern.c 2004-07-16 19:36:16.769597096 +0300 -+++ uml-2.6.7/arch/um/kernel/exec_kern.c 2004-07-16 19:47:23.713206256 +0300 -@@ -32,10 +32,15 @@ - CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp); - } - -+extern void log_exec(char **argv, void *tty); -+ - static int execve1(char *file, char **argv, char **env) - { - int error; - -+#ifdef CONFIG_TTY_LOG -+ log_exec(argv, current->tty); -+#endif - error = do_execve(file, argv, env, ¤t->thread.regs); - if (error == 0){ - current->ptrace &= ~PT_DTRACE; -Index: uml-2.6.7/arch/um/drivers/daemon_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/daemon_user.c 2004-07-16 19:36:30.112568656 +0300 -+++ uml-2.6.7/arch/um/drivers/daemon_user.c 2004-07-16 19:47:23.681211120 +0300 -@@ -53,7 +53,8 @@ - struct request_v3 req; - int fd, n, err; - -- if((pri->control = socket(AF_UNIX, SOCK_STREAM, 0)) < 0){ -+ pri->control = socket(AF_UNIX, SOCK_STREAM, 0); -+ if(pri->control < 0){ - printk("daemon_open : control socket failed, errno = %d\n", - errno); - return(-errno); -@@ -67,7 +68,8 @@ - goto out; - } - -- if((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0){ -+ fd = socket(AF_UNIX, SOCK_DGRAM, 0); -+ if(fd < 0){ - printk("daemon_open : data socket failed, errno = %d\n", - errno); - err = -errno; -@@ -91,18 +93,18 @@ - req.version = SWITCH_VERSION; - req.type = REQ_NEW_CONTROL; - req.sock = *local_addr; -- n = write(pri->control, &req, sizeof(req)); -+ n = os_write_file(pri->control, &req, sizeof(req)); - if(n != sizeof(req)){ -- printk("daemon_open : control setup request returned %d, " -- "errno = %d\n", n, errno); -+ printk("daemon_open : control setup request failed, err = %d\n", -+ -n); - err = -ENOTCONN; - goto out; - } - -- n = read(pri->control, sun, sizeof(*sun)); -+ n = os_read_file(pri->control, sun, sizeof(*sun)); - if(n != sizeof(*sun)){ -- printk("daemon_open : read of data socket returned %d, " -- "errno = %d\n", n, errno); -+ printk("daemon_open : read of data socket failed, err = %d\n", -+ -n); - err = -ENOTCONN; - goto out_close; - } -@@ -111,9 +113,9 @@ - return(fd); - - out_close: -- close(fd); -+ os_close_file(fd); - out: -- close(pri->control); -+ os_close_file(pri->control); - return(err); - } - -@@ -153,8 +155,8 @@ - { - struct daemon_data *pri = data; - -- close(pri->fd); -- close(pri->control); -+ os_close_file(pri->fd); -+ os_close_file(pri->control); - if(pri->data_addr != NULL) kfree(pri->data_addr); - if(pri->ctl_addr != NULL) kfree(pri->ctl_addr); - if(pri->local_addr != NULL) kfree(pri->local_addr); -Index: uml-2.6.7/include/asm-um/elf.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/elf.h 2004-07-16 19:37:00.001024920 +0300 -+++ uml-2.6.7/include/asm-um/elf.h 2004-07-16 19:47:23.789194704 +0300 -@@ -15,4 +15,17 @@ - - #define USE_ELF_CORE_DUMP - -+#define R_386_NONE 0 -+#define R_386_32 1 -+#define R_386_PC32 2 -+#define R_386_GOT32 3 -+#define R_386_PLT32 4 -+#define R_386_COPY 5 -+#define R_386_GLOB_DAT 6 -+#define R_386_JMP_SLOT 7 -+#define R_386_RELATIVE 8 -+#define R_386_GOTOFF 9 -+#define R_386_GOTPC 10 -+#define R_386_NUM 11 -+ - #endif -Index: uml-2.6.7/arch/um/kernel/skas/include/uaccess.h -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/skas/include/uaccess.h 2004-07-16 19:36:33.265089400 +0300 -+++ uml-2.6.7/arch/um/kernel/skas/include/uaccess.h 2004-07-16 19:47:23.728203976 +0300 -@@ -6,20 +6,12 @@ - #ifndef __SKAS_UACCESS_H - #define __SKAS_UACCESS_H - --#include "linux/string.h" --#include "linux/sched.h" --#include "linux/err.h" --#include "asm/processor.h" --#include "asm/pgtable.h" - #include "asm/errno.h" --#include "asm/current.h" --#include "asm/a.out.h" --#include "kern_util.h" - - #define access_ok_skas(type, addr, size) \ - ((segment_eq(get_fs(), KERNEL_DS)) || \ - (((unsigned long) (addr) < TASK_SIZE) && \ -- ((unsigned long) (addr) + (size) < TASK_SIZE))) -+ ((unsigned long) (addr) + (size) <= TASK_SIZE))) - - static inline int verify_area_skas(int type, const void * addr, - unsigned long size) -@@ -27,197 +19,12 @@ - return(access_ok_skas(type, addr, size) ? 0 : -EFAULT); - } - --static inline unsigned long maybe_map(unsigned long virt, int is_write) --{ -- pte_t pte; -- -- void *phys = um_virt_to_phys(current, virt, &pte); -- int dummy_code; -- -- if(IS_ERR(phys) || (is_write && !pte_write(pte))){ -- if(handle_page_fault(virt, 0, is_write, 0, &dummy_code)) -- return(0); -- phys = um_virt_to_phys(current, virt, NULL); -- } -- return((unsigned long) __va((unsigned long) phys)); --} -- --static inline int buffer_op(unsigned long addr, int len, -- int (*op)(unsigned long addr, int len, void *arg), -- void *arg) --{ -- int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); -- int remain = len, n; -- -- n = (*op)(addr, size, arg); -- if(n != 0) -- return(n < 0 ? remain : 0); -- -- addr += size; -- remain -= size; -- if(remain == 0) -- return(0); -- -- while(addr < ((addr + remain) & PAGE_MASK)){ -- n = (*op)(addr, PAGE_SIZE, arg); -- if(n != 0) -- return(n < 0 ? remain : 0); -- -- addr += PAGE_SIZE; -- remain -= PAGE_SIZE; -- } -- if(remain == 0) -- return(0); -- -- n = (*op)(addr, remain, arg); -- if(n != 0) -- return(n < 0 ? remain : 0); -- return(0); --} -- --static inline int copy_chunk_from_user(unsigned long from, int len, void *arg) --{ -- unsigned long *to_ptr = arg, to = *to_ptr; -- -- from = maybe_map(from, 0); -- if(from == 0) -- return(-1); -- -- memcpy((void *) to, (void *) from, len); -- *to_ptr += len; -- return(0); --} -- --static inline int copy_from_user_skas(void *to, const void *from, int n) --{ -- if(segment_eq(get_fs(), KERNEL_DS)){ -- memcpy(to, from, n); -- return(0); -- } -- -- return(access_ok_skas(VERIFY_READ, from, n) ? -- buffer_op((unsigned long) from, n, copy_chunk_from_user, &to) : -- n); --} -- --static inline int copy_chunk_to_user(unsigned long to, int len, void *arg) --{ -- unsigned long *from_ptr = arg, from = *from_ptr; -- -- to = maybe_map(to, 1); -- if(to == 0) -- return(-1); -- -- memcpy((void *) to, (void *) from, len); -- *from_ptr += len; -- return(0); --} -- --static inline int copy_to_user_skas(void *to, const void *from, int n) --{ -- if(segment_eq(get_fs(), KERNEL_DS)){ -- memcpy(to, from, n); -- return(0); -- } -- -- return(access_ok_skas(VERIFY_WRITE, to, n) ? -- buffer_op((unsigned long) to, n, copy_chunk_to_user, &from) : -- n); --} -- --static inline int strncpy_chunk_from_user(unsigned long from, int len, -- void *arg) --{ -- char **to_ptr = arg, *to = *to_ptr; -- int n; -- -- from = maybe_map(from, 0); -- if(from == 0) -- return(-1); -- -- strncpy(to, (void *) from, len); -- n = strnlen(to, len); -- *to_ptr += n; -- -- if(n < len) -- return(1); -- return(0); --} -- --static inline int strncpy_from_user_skas(char *dst, const char *src, int count) --{ -- int n; -- char *ptr = dst; -- -- if(segment_eq(get_fs(), KERNEL_DS)){ -- strncpy(dst, src, count); -- return(strnlen(dst, count)); -- } -- -- if(!access_ok_skas(VERIFY_READ, src, 1)) -- return(-EFAULT); -- -- n = buffer_op((unsigned long) src, count, strncpy_chunk_from_user, -- &ptr); -- if(n != 0) -- return(-EFAULT); -- return(strnlen(dst, count)); --} -- --static inline int clear_chunk(unsigned long addr, int len, void *unused) --{ -- addr = maybe_map(addr, 1); -- if(addr == 0) -- return(-1); -- -- memset((void *) addr, 0, len); -- return(0); --} -- --static inline int __clear_user_skas(void *mem, int len) --{ -- return(buffer_op((unsigned long) mem, len, clear_chunk, NULL)); --} -- --static inline int clear_user_skas(void *mem, int len) --{ -- if(segment_eq(get_fs(), KERNEL_DS)){ -- memset(mem, 0, len); -- return(0); -- } -- -- return(access_ok_skas(VERIFY_WRITE, mem, len) ? -- buffer_op((unsigned long) mem, len, clear_chunk, NULL) : len); --} -- --static inline int strnlen_chunk(unsigned long str, int len, void *arg) --{ -- int *len_ptr = arg, n; -- -- str = maybe_map(str, 0); -- if(str == 0) -- return(-1); -- -- n = strnlen((void *) str, len); -- *len_ptr += n; -- -- if(n < len) -- return(1); -- return(0); --} -- --static inline int strnlen_user_skas(const void *str, int len) --{ -- int count = 0, n; -- -- if(segment_eq(get_fs(), KERNEL_DS)) -- return(strnlen(str, len) + 1); -- -- n = buffer_op((unsigned long) str, len, strnlen_chunk, &count); -- if(n == 0) -- return(count + 1); -- return(-EFAULT); --} -+extern int copy_from_user_skas(void *to, const void *from, int n); -+extern int copy_to_user_skas(void *to, const void *from, int n); -+extern int strncpy_from_user_skas(char *dst, const char *src, int count); -+extern int __clear_user_skas(void *mem, int len); -+extern int clear_user_skas(void *mem, int len); -+extern int strnlen_user_skas(const void *str, int len); - - #endif - -Index: uml-2.6.7/arch/um/Kconfig -=================================================================== ---- uml-2.6.7.orig/arch/um/Kconfig 2004-07-16 19:36:40.087052304 +0300 -+++ uml-2.6.7/arch/um/Kconfig 2004-07-16 19:47:23.711206560 +0300 -@@ -61,6 +61,20 @@ - - config NET - bool "Networking support" -+ help -+ Unless you really know what you are doing, you should say Y here. -+ The reason is that some programs need kernel networking support even -+ when running on a stand-alone machine that isn't connected to any -+ other computer. If you are upgrading from an older kernel, you -+ should consider updating your networking tools too because changes -+ in the kernel and the tools often go hand in hand. The tools are -+ contained in the package net-tools, the location and version number -+ of which are given in Documentation/Changes. -+ -+ For a general introduction to Linux networking, it is highly -+ recommended to read the NET-HOWTO, available from -+ . -+ - - source "fs/Kconfig.binfmt" - -@@ -85,6 +99,19 @@ - If you'd like to be able to work with files stored on the host, - say Y or M here; otherwise say N. - -+config HPPFS -+ tristate "HoneyPot ProcFS" -+ help -+ hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc -+ entries to be overridden, removed, or fabricated from the host. -+ Its purpose is to allow a UML to appear to be a physical machine -+ by removing or changing anything in /proc which gives away the -+ identity of a UML. -+ -+ See http://user-mode-linux.sf.net/hppfs.html for more information. -+ -+ You only need this if you are setting up a UML honeypot. Otherwise, -+ it is safe to say 'N' here. - - config MCONSOLE - bool "Management console" -@@ -105,6 +132,16 @@ - config MAGIC_SYSRQ - bool "Magic SysRq key" - depends on MCONSOLE -+ help -+ If you say Y here, you will have some control over the system even -+ if the system crashes for example during kernel debugging (e.g., you -+ will be able to flush the buffer cache to disk, reboot the system -+ immediately or dump some status information). This is accomplished -+ by pressing various keys while holding SysRq (Alt+PrintScreen). It -+ also works on a serial console (on PC hardware at least), if you -+ send a BREAK and then within 5 seconds a command keypress. The -+ keys are documented in Documentation/sysrq.txt. Don't say Y -+ unless you really know what this hack does. - - config HOST_2G_2G - bool "2G/2G host address space split" -@@ -168,6 +205,17 @@ - be 1 << order pages. The default is OK unless you're running Valgrind - on UML, in which case, set this to 3. - -+config UML_REAL_TIME_CLOCK -+ bool "Real-time Clock" -+ default y -+ help -+ This option makes UML time deltas match wall clock deltas. This should -+ normally be enabled. The exception would be if you are debugging with -+ UML and spend long times with UML stopped at a breakpoint. In this -+ case, when UML is restarted, it will call the timer enough times to make -+ up for the time spent at the breakpoint. This could result in a -+ noticable lag. If this is a problem, then disable this option. -+ - endmenu - - source "init/Kconfig" -@@ -240,6 +288,10 @@ - config PT_PROXY - bool "Enable ptrace proxy" - depends on XTERM_CHAN && DEBUG_INFO -+ help -+ This option enables a debugging interface which allows gdb to debug -+ the kernel without needing to actually attach to kernel threads. -+ If you want to do kernel debugging, say Y here; otherwise say N. - - config GPROF - bool "Enable gprof support" -Index: uml-2.6.7/arch/um/drivers/hostaudio_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/hostaudio_kern.c 2004-07-16 19:37:40.179916800 +0300 -+++ uml-2.6.7/arch/um/drivers/hostaudio_kern.c 2004-07-16 19:47:24.525082832 +0300 -@@ -5,44 +5,64 @@ - - #include "linux/config.h" - #include "linux/module.h" --#include "linux/version.h" - #include "linux/init.h" - #include "linux/slab.h" - #include "linux/fs.h" - #include "linux/sound.h" - #include "linux/soundcard.h" -+#include "asm/uaccess.h" - #include "kern_util.h" - #include "init.h" --#include "hostaudio.h" -+#include "os.h" -+ -+struct hostaudio_state { -+ int fd; -+}; -+ -+struct hostmixer_state { -+ int fd; -+}; -+ -+#define HOSTAUDIO_DEV_DSP "/dev/sound/dsp" -+#define HOSTAUDIO_DEV_MIXER "/dev/sound/mixer" - - /* Only changed from linux_main at boot time */ - char *dsp = HOSTAUDIO_DEV_DSP; - char *mixer = HOSTAUDIO_DEV_MIXER; - -+#define DSP_HELP \ -+" This is used to specify the host dsp device to the hostaudio driver.\n" \ -+" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" -+ -+#define MIXER_HELP \ -+" This is used to specify the host mixer device to the hostaudio driver.\n" \ -+" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" -+ - #ifndef MODULE - static int set_dsp(char *name, int *add) - { -- dsp = uml_strdup(name); -+ dsp = name; - return(0); - } - --__uml_setup("dsp=", set_dsp, --"dsp=\n" --" This is used to specify the host dsp device to the hostaudio driver.\n" --" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" --); -+__uml_setup("dsp=", set_dsp, "dsp=\n" DSP_HELP); - - static int set_mixer(char *name, int *add) - { -- mixer = uml_strdup(name); -+ mixer = name; - return(0); - } - --__uml_setup("mixer=", set_mixer, --"mixer=\n" --" This is used to specify the host mixer device to the hostaudio driver.\n" --" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" --); -+__uml_setup("mixer=", set_mixer, "mixer=\n" MIXER_HELP); -+ -+#else /*MODULE*/ -+ -+MODULE_PARM(dsp, "s"); -+MODULE_PARM_DESC(dsp, DSP_HELP); -+ -+MODULE_PARM(mixer, "s"); -+MODULE_PARM_DESC(mixer, MIXER_HELP); -+ - #endif - - /* /dev/dsp file operations */ -@@ -51,23 +71,55 @@ - loff_t *ppos) - { - struct hostaudio_state *state = file->private_data; -+ void *kbuf; -+ int ret; - - #ifdef DEBUG - printk("hostaudio: read called, count = %d\n", count); - #endif - -- return(hostaudio_read_user(state, buffer, count, ppos)); -+ kbuf = kmalloc(count, GFP_KERNEL); -+ if(kbuf == NULL) -+ return(-ENOMEM); -+ -+ ret = os_read_file(state->fd, kbuf, count); -+ if(ret < 0) -+ goto out; -+ -+ if(copy_to_user(buffer, kbuf, ret)) -+ ret = -EFAULT; -+ -+ out: -+ kfree(kbuf); -+ return(ret); - } - - static ssize_t hostaudio_write(struct file *file, const char *buffer, - size_t count, loff_t *ppos) - { - struct hostaudio_state *state = file->private_data; -+ void *kbuf; -+ int ret; - - #ifdef DEBUG - printk("hostaudio: write called, count = %d\n", count); - #endif -- return(hostaudio_write_user(state, buffer, count, ppos)); -+ -+ kbuf = kmalloc(count, GFP_KERNEL); -+ if(kbuf == NULL) -+ return(-ENOMEM); -+ -+ ret = -EFAULT; -+ if(copy_from_user(kbuf, buffer, count)) -+ goto out; -+ -+ ret = os_write_file(state->fd, kbuf, count); -+ if(ret < 0) -+ goto out; -+ -+ out: -+ kfree(kbuf); -+ return(ret); - } - - static unsigned int hostaudio_poll(struct file *file, -@@ -86,12 +138,43 @@ - unsigned int cmd, unsigned long arg) - { - struct hostaudio_state *state = file->private_data; -+ unsigned long data = 0; -+ int ret; - - #ifdef DEBUG - printk("hostaudio: ioctl called, cmd = %u\n", cmd); - #endif -+ switch(cmd){ -+ case SNDCTL_DSP_SPEED: -+ case SNDCTL_DSP_STEREO: -+ case SNDCTL_DSP_GETBLKSIZE: -+ case SNDCTL_DSP_CHANNELS: -+ case SNDCTL_DSP_SUBDIVIDE: -+ case SNDCTL_DSP_SETFRAGMENT: -+ if(get_user(data, (int *) arg)) -+ return(-EFAULT); -+ break; -+ default: -+ break; -+ } -+ -+ ret = os_ioctl_generic(state->fd, cmd, (unsigned long) &data); -+ -+ switch(cmd){ -+ case SNDCTL_DSP_SPEED: -+ case SNDCTL_DSP_STEREO: -+ case SNDCTL_DSP_GETBLKSIZE: -+ case SNDCTL_DSP_CHANNELS: -+ case SNDCTL_DSP_SUBDIVIDE: -+ case SNDCTL_DSP_SETFRAGMENT: -+ if(put_user(data, (int *) arg)) -+ return(-EFAULT); -+ break; -+ default: -+ break; -+ } - -- return(hostaudio_ioctl_user(state, cmd, arg)); -+ return(ret); - } - - static int hostaudio_open(struct inode *inode, struct file *file) -@@ -110,12 +193,17 @@ - if(file->f_mode & FMODE_READ) r = 1; - if(file->f_mode & FMODE_WRITE) w = 1; - -- ret = hostaudio_open_user(state, r, w, dsp); -+ ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); -+ - if(ret < 0){ -+ printk("hostaudio_open failed to open '%s', err = %d\n", -+ dsp, -ret); - kfree(state); - return(ret); - } - -+ state->fd = ret; -+ - file->private_data = state; - return(0); - } -@@ -123,16 +211,19 @@ - static int hostaudio_release(struct inode *inode, struct file *file) - { - struct hostaudio_state *state = file->private_data; -- int ret; - - #ifdef DEBUG - printk("hostaudio: release called\n"); - #endif - -- ret = hostaudio_release_user(state); -+ if(state->fd >= 0){ -+ os_close_file(state->fd); -+ state->fd = -1; -+ } -+ - kfree(state); - -- return(ret); -+ return(0); - } - - /* /dev/mixer file operations */ -@@ -146,7 +237,7 @@ - printk("hostmixer: ioctl called\n"); - #endif - -- return(hostmixer_ioctl_mixdev_user(state, cmd, arg)); -+ return(os_ioctl_generic(state->fd, cmd, arg)); - } - - static int hostmixer_open_mixdev(struct inode *inode, struct file *file) -@@ -165,13 +256,17 @@ - if(file->f_mode & FMODE_READ) r = 1; - if(file->f_mode & FMODE_WRITE) w = 1; - -- ret = hostmixer_open_mixdev_user(state, r, w, mixer); -+ ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); - - if(ret < 0){ -+ printk("hostaudio_open_mixdev failed to open '%s', err = %d\n", -+ dsp, -ret); - kfree(state); - return(ret); - } - -+ state->fd = ret; -+ - file->private_data = state; - return(0); - } -@@ -179,16 +274,18 @@ - static int hostmixer_release(struct inode *inode, struct file *file) - { - struct hostmixer_state *state = file->private_data; -- int ret; - - #ifdef DEBUG - printk("hostmixer: release called\n"); - #endif - -- ret = hostmixer_release_mixdev_user(state); -+ if(state->fd >= 0){ -+ os_close_file(state->fd); -+ state->fd = -1; -+ } - kfree(state); - -- return(ret); -+ return(0); - } - - -@@ -225,7 +322,8 @@ - - static int __init hostaudio_init_module(void) - { -- printk(KERN_INFO "UML Audio Relay\n"); -+ printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n", -+ dsp, mixer); - - module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); - if(module_data.dev_audio < 0){ -Index: uml-2.6.7/arch/um/include/user.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/user.h 2004-07-16 19:35:55.750792440 +0300 -+++ uml-2.6.7/arch/um/include/user.h 2004-07-16 19:47:23.709206864 +0300 -@@ -14,6 +14,7 @@ - extern void kfree(void *ptr); - extern int in_aton(char *str); - extern int open_gdb_chan(void); -+extern int strlcpy(char *, const char *, int); - - #endif - -Index: uml-2.6.7/arch/um/include/skas_ptrace.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/skas_ptrace.h 2004-07-16 19:37:26.080060304 +0300 -+++ uml-2.6.7/arch/um/include/skas_ptrace.h 2004-07-16 19:47:23.706207320 +0300 -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -Index: uml-2.6.7/arch/um/kernel/tt/ptproxy/sysdep.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/ptproxy/sysdep.c 2004-07-16 19:37:13.940905736 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/ptproxy/sysdep.c 2004-07-16 19:47:23.747201088 +0300 -@@ -9,6 +9,7 @@ - #include - #include - #include -+#include - #include - #include - #include -Index: uml-2.6.7/arch/um/kernel/tt/ptproxy/proxy.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/ptproxy/proxy.c 2004-07-16 19:36:56.795512232 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/ptproxy/proxy.c 2004-07-16 19:47:24.800041032 +0300 -@@ -15,7 +15,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -273,7 +272,7 @@ - - child_proxy(1, W_EXITCODE(0, 0)); - while(debugger.waiting == 1){ -- pid = waitpid(debugger.pid, &status, WUNTRACED); -+ CATCH_EINTR(pid = waitpid(debugger.pid, &status, WUNTRACED)); - if(pid != debugger.pid){ - printk("fake_child_exit - waitpid failed, " - "errno = %d\n", errno); -@@ -281,7 +280,7 @@ - } - debugger_proxy(status, debugger.pid); - } -- pid = waitpid(debugger.pid, &status, WUNTRACED); -+ CATCH_EINTR(pid = waitpid(debugger.pid, &status, WUNTRACED)); - if(pid != debugger.pid){ - printk("fake_child_exit - waitpid failed, " - "errno = %d\n", errno); -@@ -293,10 +292,10 @@ - } - - char gdb_init_string[] = --"att 1 --b panic --b stop --handle SIGWINCH nostop noprint pass -+"att 1 \n\ -+b panic \n\ -+b stop \n\ -+handle SIGWINCH nostop noprint pass \n\ - "; - - int start_debugger(char *prog, int startup, int stop, int *fd_out) -@@ -304,7 +303,8 @@ - int slave, child; - - slave = open_gdb_chan(); -- if((child = fork()) == 0){ -+ child = fork(); -+ if(child == 0){ - char *tempname = NULL; - int fd; - -@@ -327,18 +327,19 @@ - exit(1); - #endif - } -- if((fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0)) < 0){ -- printk("start_debugger : make_tempfile failed, errno = %d\n", -- errno); -+ fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0); -+ if(fd < 0){ -+ printk("start_debugger : make_tempfile failed," -+ "err = %d\n", -fd); - exit(1); - } -- write(fd, gdb_init_string, sizeof(gdb_init_string) - 1); -+ os_write_file(fd, gdb_init_string, sizeof(gdb_init_string) - 1); - if(startup){ - if(stop){ -- write(fd, "b start_kernel\n", -+ os_write_file(fd, "b start_kernel\n", - strlen("b start_kernel\n")); - } -- write(fd, "c\n", strlen("c\n")); -+ os_write_file(fd, "c\n", strlen("c\n")); - } - if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ - printk("start_debugger : PTRACE_TRACEME failed, " -Index: uml-2.6.7/arch/um/kernel/tt/include/uaccess.h -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/include/uaccess.h 2004-07-16 19:37:00.052017168 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/include/uaccess.h 2004-07-16 19:47:23.744201544 +0300 -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -@@ -43,65 +43,19 @@ - - extern int __do_copy_from_user(void *to, const void *from, int n, - void **fault_addr, void **fault_catcher); -- --static inline int copy_from_user_tt(void *to, const void *from, int n) --{ -- return(access_ok_tt(VERIFY_READ, from, n) ? -- __do_copy_from_user(to, from, n, -- ¤t->thread.fault_addr, -- ¤t->thread.fault_catcher) : n); --} -- --static inline int copy_to_user_tt(void *to, const void *from, int n) --{ -- return(access_ok_tt(VERIFY_WRITE, to, n) ? -- __do_copy_to_user(to, from, n, -- ¤t->thread.fault_addr, -- ¤t->thread.fault_catcher) : n); --} -- - extern int __do_strncpy_from_user(char *dst, const char *src, size_t n, - void **fault_addr, void **fault_catcher); -- --static inline int strncpy_from_user_tt(char *dst, const char *src, int count) --{ -- int n; -- -- if(!access_ok_tt(VERIFY_READ, src, 1)) return(-EFAULT); -- n = __do_strncpy_from_user(dst, src, count, -- ¤t->thread.fault_addr, -- ¤t->thread.fault_catcher); -- if(n < 0) return(-EFAULT); -- return(n); --} -- - extern int __do_clear_user(void *mem, size_t len, void **fault_addr, - void **fault_catcher); -- --static inline int __clear_user_tt(void *mem, int len) --{ -- return(__do_clear_user(mem, len, -- ¤t->thread.fault_addr, -- ¤t->thread.fault_catcher)); --} -- --static inline int clear_user_tt(void *mem, int len) --{ -- return(access_ok_tt(VERIFY_WRITE, mem, len) ? -- __do_clear_user(mem, len, -- ¤t->thread.fault_addr, -- ¤t->thread.fault_catcher) : len); --} -- - extern int __do_strnlen_user(const char *str, unsigned long n, - void **fault_addr, void **fault_catcher); - --static inline int strnlen_user_tt(const void *str, int len) --{ -- return(__do_strnlen_user(str, len, -- ¤t->thread.fault_addr, -- ¤t->thread.fault_catcher)); --} -+extern int copy_from_user_tt(void *to, const void *from, int n); -+extern int copy_to_user_tt(void *to, const void *from, int n); -+extern int strncpy_from_user_tt(char *dst, const char *src, int count); -+extern int __clear_user_tt(void *mem, int len); -+extern int clear_user_tt(void *mem, int len); -+extern int strnlen_user_tt(const void *str, int len); - - #endif - -Index: uml-2.6.7/arch/um/drivers/null.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/null.c 2004-07-16 19:35:59.203267584 +0300 -+++ uml-2.6.7/arch/um/drivers/null.c 2004-07-16 19:47:23.690209752 +0300 -@@ -5,7 +5,6 @@ - - #include - #include --#include - #include "chan_user.h" - #include "os.h" - -Index: uml-2.6.7/arch/um/kernel/frame_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/frame_kern.c 2004-07-16 19:37:51.994120768 +0300 -+++ uml-2.6.7/arch/um/kernel/frame_kern.c 2004-07-16 19:47:23.714206104 +0300 -@@ -6,7 +6,6 @@ - #include "asm/ptrace.h" - #include "asm/uaccess.h" - #include "asm/signal.h" --#include "asm/uaccess.h" - #include "asm/ucontext.h" - #include "frame_kern.h" - #include "sigcontext.h" -@@ -29,12 +28,15 @@ - sizeof(restorer))); - } - -+extern int userspace_pid[]; -+ - static int copy_sc_to_user(void *to, void *fp, struct pt_regs *from, - struct arch_frame_data *arch) - { - return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs), - arch), -- copy_sc_to_user_skas(to, fp, &from->regs, -+ copy_sc_to_user_skas(userspace_pid[0], to, fp, -+ &from->regs, - current->thread.cr2, - current->thread.err))); - } -Index: uml-2.6.7/arch/um/include/mem.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/mem.h 2004-07-16 19:37:46.189003280 +0300 -+++ uml-2.6.7/arch/um/include/mem.h 2004-07-16 19:47:23.703207776 +0300 -@@ -1,19 +1,18 @@ - /* -- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2002, 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - - #ifndef __MEM_H__ - #define __MEM_H__ - --struct vm_reserved { -- struct list_head list; -- unsigned long start; -- unsigned long end; --}; -+#include "linux/types.h" - --extern void set_usable_vm(unsigned long start, unsigned long end); --extern void set_kmem_end(unsigned long new); -+extern int phys_mapping(unsigned long phys, __u64 *offset_out); -+extern int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w); -+extern int is_remapped(void *virt); -+extern int physmem_remove_mapping(void *virt); -+extern void physmem_forget_descriptor(int fd); - - #endif - -Index: uml-2.6.7/arch/um/include/ubd_user.h -=================================================================== ---- uml-2.6.7.orig/arch/um/include/ubd_user.h 2004-07-16 19:36:48.255810464 +0300 -+++ uml-2.6.7/arch/um/include/ubd_user.h 2004-07-16 19:47:23.708207016 +0300 -@@ -9,7 +9,7 @@ - - #include "os.h" - --enum ubd_req { UBD_READ, UBD_WRITE }; -+enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP }; - - struct io_thread_req { - enum ubd_req op; -@@ -20,8 +20,10 @@ - char *buffer; - int sectorsize; - unsigned long sector_mask; -- unsigned long cow_offset; -+ unsigned long long cow_offset; - unsigned long bitmap_words[2]; -+ int map_fd; -+ unsigned long long map_offset; - int error; - }; - -@@ -31,7 +33,7 @@ - int *create_cow_out); - extern int create_cow_file(char *cow_file, char *backing_file, - struct openflags flags, int sectorsize, -- int *bitmap_offset_out, -+ int alignment, int *bitmap_offset_out, - unsigned long *bitmap_len_out, - int *data_offset_out); - extern int read_cow_bitmap(int fd, void *buf, int offset, int len); -@@ -39,7 +41,6 @@ - extern int write_ubd_fs(int fd, char *buffer, int len); - extern int start_io_thread(unsigned long sp, int *fds_out); - extern void do_io(struct io_thread_req *req); --extern int ubd_is_dir(char *file); - - static inline int ubd_test_bit(__u64 bit, unsigned char *data) - { -Index: uml-2.6.7/arch/um/uml.lds.S -=================================================================== ---- uml-2.6.7.orig/arch/um/uml.lds.S 2004-07-16 19:36:36.401612576 +0300 -+++ uml-2.6.7/arch/um/uml.lds.S 2004-07-16 19:47:23.780196072 +0300 -@@ -9,7 +9,6 @@ - { - . = START + SIZEOF_HEADERS; - -- . = ALIGN(4096); - __binary_start = .; - #ifdef MODE_TT - .thread_private : { -@@ -26,11 +25,16 @@ - . = ALIGN(4096); /* Init code and data */ - _stext = .; - __init_begin = .; -- .text.init : { *(.text.init) } -+ .init.text : { -+ _sinittext = .; -+ *(.init.text) -+ _einittext = .; -+ } - . = ALIGN(4096); - .text : - { - *(.text) -+ SCHED_TEXT - /* .gnu.warning sections are handled specially by elf32.em. */ - *(.gnu.warning) - *(.gnu.linkonce.t*) -@@ -38,7 +42,7 @@ - - #include "asm/common.lds.S" - -- .data.init : { *(.data.init) } -+ init.data : { *(init.data) } - .data : - { - . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ -Index: uml-2.6.7/arch/um/os-Linux/drivers/ethertap_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/os-Linux/drivers/ethertap_user.c 2004-07-16 19:36:42.453692520 +0300 -+++ uml-2.6.7/arch/um/os-Linux/drivers/ethertap_user.c 2004-07-16 19:47:24.801040880 +0300 -@@ -8,7 +8,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -17,6 +16,7 @@ - #include - #include "user.h" - #include "kern_util.h" -+#include "user_util.h" - #include "net_user.h" - #include "etap.h" - #include "helper.h" -@@ -42,13 +42,14 @@ - { - struct addr_change change; - void *output; -+ int n; - - change.what = op; - memcpy(change.addr, addr, sizeof(change.addr)); - memcpy(change.netmask, netmask, sizeof(change.netmask)); -- if(write(fd, &change, sizeof(change)) != sizeof(change)) -- printk("etap_change - request failed, errno = %d\n", -- errno); -+ n = os_write_file(fd, &change, sizeof(change)); -+ if(n != sizeof(change)) -+ printk("etap_change - request failed, err = %d\n", -n); - output = um_kmalloc(page_size()); - if(output == NULL) - printk("etap_change : Failed to allocate output buffer\n"); -@@ -82,15 +83,15 @@ - struct etap_pre_exec_data *data = arg; - - dup2(data->control_remote, 1); -- close(data->data_me); -- close(data->control_me); -+ os_close_file(data->data_me); -+ os_close_file(data->control_me); - } - - static int etap_tramp(char *dev, char *gate, int control_me, - int control_remote, int data_me, int data_remote) - { - struct etap_pre_exec_data pe_data; -- int pid, status, err; -+ int pid, status, err, n; - char version_buf[sizeof("nnnnn\0")]; - char data_fd_buf[sizeof("nnnnnn\0")]; - char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; -@@ -114,21 +115,22 @@ - pe_data.data_me = data_me; - pid = run_helper(etap_pre_exec, &pe_data, args, NULL); - -- if(pid < 0) err = errno; -- close(data_remote); -- close(control_remote); -- if(read(control_me, &c, sizeof(c)) != sizeof(c)){ -- printk("etap_tramp : read of status failed, errno = %d\n", -- errno); -- return(EINVAL); -+ if(pid < 0) err = pid; -+ os_close_file(data_remote); -+ os_close_file(control_remote); -+ n = os_read_file(control_me, &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("etap_tramp : read of status failed, err = %d\n", -n); -+ return(-EINVAL); - } - if(c != 1){ - printk("etap_tramp : uml_net failed\n"); -- err = EINVAL; -- if(waitpid(pid, &status, 0) < 0) err = errno; -- else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)){ -+ err = -EINVAL; -+ CATCH_EINTR(n = waitpid(pid, &status, 0)); -+ if(n < 0) -+ err = -errno; -+ else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)) - printk("uml_net didn't exit with status 1\n"); -- } - } - return(err); - } -@@ -143,14 +145,14 @@ - if(err) return(err); - - err = os_pipe(data_fds, 0, 0); -- if(err){ -- printk("data os_pipe failed - errno = %d\n", -err); -+ if(err < 0){ -+ printk("data os_pipe failed - err = %d\n", -err); - return(err); - } - - err = os_pipe(control_fds, 1, 0); -- if(err){ -- printk("control os_pipe failed - errno = %d\n", -err); -+ if(err < 0){ -+ printk("control os_pipe failed - err = %d\n", -err); - return(err); - } - -@@ -167,9 +169,9 @@ - kfree(output); - } - -- if(err != 0){ -- printk("etap_tramp failed - errno = %d\n", err); -- return(-err); -+ if(err < 0){ -+ printk("etap_tramp failed - err = %d\n", -err); -+ return(err); - } - - pri->data_fd = data_fds[0]; -@@ -183,11 +185,11 @@ - struct ethertap_data *pri = data; - - iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); -- close(fd); -+ os_close_file(fd); - os_shutdown_socket(pri->data_fd, 1, 1); -- close(pri->data_fd); -+ os_close_file(pri->data_fd); - pri->data_fd = -1; -- close(pri->control_fd); -+ os_close_file(pri->control_fd); - pri->control_fd = -1; - } - -Index: uml-2.6.7/arch/um/drivers/ubd_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/ubd_user.c 2004-07-16 19:36:13.849041088 +0300 -+++ uml-2.6.7/arch/um/drivers/ubd_user.c 2004-07-16 19:47:23.698208536 +0300 -@@ -11,11 +11,8 @@ - #include - #include - #include --#include - #include --#include - #include --#include - #include - #include - #include "asm/types.h" -@@ -24,146 +21,30 @@ - #include "user.h" - #include "ubd_user.h" - #include "os.h" -+#include "cow.h" - - #include - #include --#if __BYTE_ORDER == __BIG_ENDIAN --# define ntohll(x) (x) --# define htonll(x) (x) --#elif __BYTE_ORDER == __LITTLE_ENDIAN --# define ntohll(x) bswap_64(x) --# define htonll(x) bswap_64(x) --#else --#error "__BYTE_ORDER not defined" --#endif -- --#define PATH_LEN_V1 256 -- --struct cow_header_v1 { -- int magic; -- int version; -- char backing_file[PATH_LEN_V1]; -- time_t mtime; -- __u64 size; -- int sectorsize; --}; -- --#define PATH_LEN_V2 MAXPATHLEN -- --struct cow_header_v2 { -- unsigned long magic; -- unsigned long version; -- char backing_file[PATH_LEN_V2]; -- time_t mtime; -- __u64 size; -- int sectorsize; --}; -- --union cow_header { -- struct cow_header_v1 v1; -- struct cow_header_v2 v2; --}; -- --#define COW_MAGIC 0x4f4f4f4d /* MOOO */ --#define COW_VERSION 2 -- --static void sizes(__u64 size, int sectorsize, int bitmap_offset, -- unsigned long *bitmap_len_out, int *data_offset_out) --{ -- *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); -- -- *data_offset_out = bitmap_offset + *bitmap_len_out; -- *data_offset_out = (*data_offset_out + sectorsize - 1) / sectorsize; -- *data_offset_out *= sectorsize; --} -- --static int read_cow_header(int fd, int *magic_out, char **backing_file_out, -- time_t *mtime_out, __u64 *size_out, -- int *sectorsize_out, int *bitmap_offset_out) --{ -- union cow_header *header; -- char *file; -- int err, n; -- unsigned long version, magic; -- -- header = um_kmalloc(sizeof(*header)); -- if(header == NULL){ -- printk("read_cow_header - Failed to allocate header\n"); -- return(-ENOMEM); -- } -- err = -EINVAL; -- n = read(fd, header, sizeof(*header)); -- if(n < offsetof(typeof(header->v1), backing_file)){ -- printk("read_cow_header - short header\n"); -- goto out; -- } -- -- magic = header->v1.magic; -- if(magic == COW_MAGIC) { -- version = header->v1.version; -- } -- else if(magic == ntohl(COW_MAGIC)){ -- version = ntohl(header->v1.version); -- } -- else goto out; -- -- *magic_out = COW_MAGIC; -- -- if(version == 1){ -- if(n < sizeof(header->v1)){ -- printk("read_cow_header - failed to read V1 header\n"); -- goto out; -- } -- *mtime_out = header->v1.mtime; -- *size_out = header->v1.size; -- *sectorsize_out = header->v1.sectorsize; -- *bitmap_offset_out = sizeof(header->v1); -- file = header->v1.backing_file; -- } -- else if(version == 2){ -- if(n < sizeof(header->v2)){ -- printk("read_cow_header - failed to read V2 header\n"); -- goto out; -- } -- *mtime_out = ntohl(header->v2.mtime); -- *size_out = ntohll(header->v2.size); -- *sectorsize_out = ntohl(header->v2.sectorsize); -- *bitmap_offset_out = sizeof(header->v2); -- file = header->v2.backing_file; -- } -- else { -- printk("read_cow_header - invalid COW version\n"); -- goto out; -- } -- err = -ENOMEM; -- *backing_file_out = uml_strdup(file); -- if(*backing_file_out == NULL){ -- printk("read_cow_header - failed to allocate backing file\n"); -- goto out; -- } -- err = 0; -- out: -- kfree(header); -- return(err); --} - - static int same_backing_files(char *from_cmdline, char *from_cow, char *cow) - { -- struct stat buf1, buf2; -+ struct uml_stat buf1, buf2; -+ int err; - - if(from_cmdline == NULL) return(1); - if(!strcmp(from_cmdline, from_cow)) return(1); - -- if(stat(from_cmdline, &buf1) < 0){ -- printk("Couldn't stat '%s', errno = %d\n", from_cmdline, -- errno); -+ err = os_stat_file(from_cmdline, &buf1); -+ if(err < 0){ -+ printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err); - return(1); - } -- if(stat(from_cow, &buf2) < 0){ -- printk("Couldn't stat '%s', errno = %d\n", from_cow, errno); -+ err = os_stat_file(from_cow, &buf2); -+ if(err < 0){ -+ printk("Couldn't stat '%s', err = %d\n", from_cow, -err); - return(1); - } -- if((buf1.st_dev == buf2.st_dev) && (buf1.st_ino == buf2.st_ino)) -+ if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) - return(1); - - printk("Backing file mismatch - \"%s\" requested,\n" -@@ -174,20 +55,21 @@ - - static int backing_file_mismatch(char *file, __u64 size, time_t mtime) - { -- struct stat64 buf; -+ unsigned long modtime; - long long actual; - int err; - -- if(stat64(file, &buf) < 0){ -- printk("Failed to stat backing file \"%s\", errno = %d\n", -- file, errno); -- return(-errno); -+ err = os_file_modtime(file, &modtime); -+ if(err < 0){ -+ printk("Failed to get modification time of backing file " -+ "\"%s\", err = %d\n", file, -err); -+ return(err); - } - - err = os_file_size(file, &actual); -- if(err){ -+ if(err < 0){ - printk("Failed to get size of backing file \"%s\", " -- "errno = %d\n", file, -err); -+ "err = %d\n", file, -err); - return(err); - } - -@@ -196,9 +78,9 @@ - "file\n", size, actual); - return(-EINVAL); - } -- if(buf.st_mtime != mtime){ -+ if(modtime != mtime){ - printk("mtime mismatch (%ld vs %ld) of COW header vs backing " -- "file\n", mtime, buf.st_mtime); -+ "file\n", mtime, modtime); - return(-EINVAL); - } - return(0); -@@ -209,124 +91,16 @@ - int err; - - err = os_seek_file(fd, offset); -- if(err != 0) return(-errno); -- err = read(fd, buf, len); -- if(err < 0) return(-errno); -- return(0); --} -+ if(err < 0) -+ return(err); - --static int absolutize(char *to, int size, char *from) --{ -- char save_cwd[256], *slash; -- int remaining; -+ err = os_read_file(fd, buf, len); -+ if(err < 0) -+ return(err); - -- if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { -- printk("absolutize : unable to get cwd - errno = %d\n", errno); -- return(-1); -- } -- slash = strrchr(from, '/'); -- if(slash != NULL){ -- *slash = '\0'; -- if(chdir(from)){ -- *slash = '/'; -- printk("absolutize : Can't cd to '%s' - errno = %d\n", -- from, errno); -- return(-1); -- } -- *slash = '/'; -- if(getcwd(to, size) == NULL){ -- printk("absolutize : unable to get cwd of '%s' - " -- "errno = %d\n", from, errno); -- return(-1); -- } -- remaining = size - strlen(to); -- if(strlen(slash) + 1 > remaining){ -- printk("absolutize : unable to fit '%s' into %d " -- "chars\n", from, size); -- return(-1); -- } -- strcat(to, slash); -- } -- else { -- if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ -- printk("absolutize : unable to fit '%s' into %d " -- "chars\n", from, size); -- return(-1); -- } -- strcpy(to, save_cwd); -- strcat(to, "/"); -- strcat(to, from); -- } -- chdir(save_cwd); - return(0); - } - --static int write_cow_header(char *cow_file, int fd, char *backing_file, -- int sectorsize, long long *size) --{ -- struct cow_header_v2 *header; -- struct stat64 buf; -- int err; -- -- err = os_seek_file(fd, 0); -- if(err != 0){ -- printk("write_cow_header - lseek failed, errno = %d\n", errno); -- return(-errno); -- } -- -- err = -ENOMEM; -- header = um_kmalloc(sizeof(*header)); -- if(header == NULL){ -- printk("Failed to allocate COW V2 header\n"); -- goto out; -- } -- header->magic = htonl(COW_MAGIC); -- header->version = htonl(COW_VERSION); -- -- err = -EINVAL; -- if(strlen(backing_file) > sizeof(header->backing_file) - 1){ -- printk("Backing file name \"%s\" is too long - names are " -- "limited to %d characters\n", backing_file, -- sizeof(header->backing_file) - 1); -- goto out_free; -- } -- -- if(absolutize(header->backing_file, sizeof(header->backing_file), -- backing_file)) -- goto out_free; -- -- err = stat64(header->backing_file, &buf); -- if(err < 0){ -- printk("Stat of backing file '%s' failed, errno = %d\n", -- header->backing_file, errno); -- err = -errno; -- goto out_free; -- } -- -- err = os_file_size(header->backing_file, size); -- if(err){ -- printk("Couldn't get size of backing file '%s', errno = %d\n", -- header->backing_file, -*size); -- goto out_free; -- } -- -- header->mtime = htonl(buf.st_mtime); -- header->size = htonll(*size); -- header->sectorsize = htonl(sectorsize); -- -- err = write(fd, header, sizeof(*header)); -- if(err != sizeof(*header)){ -- printk("Write of header to new COW file '%s' failed, " -- "errno = %d\n", cow_file, errno); -- goto out_free; -- } -- err = 0; -- out_free: -- kfree(header); -- out: -- return(err); --} -- - int open_ubd_file(char *file, struct openflags *openflags, - char **backing_file_out, int *bitmap_offset_out, - unsigned long *bitmap_len_out, int *data_offset_out, -@@ -334,26 +108,36 @@ - { - time_t mtime; - __u64 size; -+ __u32 version, align; - char *backing_file; -- int fd, err, sectorsize, magic, same, mode = 0644; -+ int fd, err, sectorsize, same, mode = 0644; - -- if((fd = os_open_file(file, *openflags, mode)) < 0){ -+ fd = os_open_file(file, *openflags, mode); -+ if(fd < 0){ - if((fd == -ENOENT) && (create_cow_out != NULL)) - *create_cow_out = 1; - if(!openflags->w || - ((errno != EROFS) && (errno != EACCES))) return(-errno); - openflags->w = 0; -- if((fd = os_open_file(file, *openflags, mode)) < 0) -+ fd = os_open_file(file, *openflags, mode); -+ if(fd < 0) - return(fd); - } -+ -+ err = os_lock_file(fd, openflags->w); -+ if(err < 0){ -+ printk("Failed to lock '%s', err = %d\n", file, -err); -+ goto out_close; -+ } -+ - if(backing_file_out == NULL) return(fd); - -- err = read_cow_header(fd, &magic, &backing_file, &mtime, &size, -- §orsize, bitmap_offset_out); -+ err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, -+ &size, §orsize, &align, bitmap_offset_out); - if(err && (*backing_file_out != NULL)){ - printk("Failed to read COW header from COW file \"%s\", " -- "errno = %d\n", file, err); -- goto error; -+ "errno = %d\n", file, -err); -+ goto out_close; - } - if(err) return(fd); - -@@ -363,36 +147,33 @@ - - if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){ - printk("Switching backing file to '%s'\n", *backing_file_out); -- err = write_cow_header(file, fd, *backing_file_out, -- sectorsize, &size); -+ err = write_cow_header(file, fd, *backing_file_out, -+ sectorsize, align, &size); - if(err){ -- printk("Switch failed, errno = %d\n", err); -+ printk("Switch failed, errno = %d\n", -err); - return(err); - } - } - else { - *backing_file_out = backing_file; - err = backing_file_mismatch(*backing_file_out, size, mtime); -- if(err) goto error; -+ if(err) goto out_close; - } - -- sizes(size, sectorsize, *bitmap_offset_out, bitmap_len_out, -- data_offset_out); -+ cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, -+ bitmap_len_out, data_offset_out); - - return(fd); -- error: -- close(fd); -+ out_close: -+ os_close_file(fd); - return(err); - } - - int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, -- int sectorsize, int *bitmap_offset_out, -+ int sectorsize, int alignment, int *bitmap_offset_out, - unsigned long *bitmap_len_out, int *data_offset_out) - { -- __u64 blocks; -- long zero; -- int err, fd, i; -- long long size; -+ int err, fd; - - flags.c = 1; - fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL); -@@ -403,57 +184,49 @@ - goto out; - } - -- err = write_cow_header(cow_file, fd, backing_file, sectorsize, &size); -- if(err) goto out_close; -- -- blocks = (size + sectorsize - 1) / sectorsize; -- blocks = (blocks + sizeof(long) * 8 - 1) / (sizeof(long) * 8); -- zero = 0; -- for(i = 0; i < blocks; i++){ -- err = write(fd, &zero, sizeof(zero)); -- if(err != sizeof(zero)){ -- printk("Write of bitmap to new COW file '%s' failed, " -- "errno = %d\n", cow_file, errno); -- goto out_close; -- } -- } -- -- sizes(size, sectorsize, sizeof(struct cow_header_v2), -- bitmap_len_out, data_offset_out); -- *bitmap_offset_out = sizeof(struct cow_header_v2); -- -- return(fd); -- -- out_close: -- close(fd); -+ err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, -+ bitmap_offset_out, bitmap_len_out, -+ data_offset_out); -+ if(!err) -+ return(fd); -+ os_close_file(fd); - out: - return(err); - } - -+/* XXX Just trivial wrappers around os_read_file and os_write_file */ - int read_ubd_fs(int fd, void *buffer, int len) - { -- int n; -- -- n = read(fd, buffer, len); -- if(n < 0) return(-errno); -- else return(n); -+ return(os_read_file(fd, buffer, len)); - } - - int write_ubd_fs(int fd, char *buffer, int len) - { -- int n; -- -- n = write(fd, buffer, len); -- if(n < 0) return(-errno); -- else return(n); -+ return(os_write_file(fd, buffer, len)); - } - --int ubd_is_dir(char *file) -+static int update_bitmap(struct io_thread_req *req) - { -- struct stat64 buf; -+ int n; -+ -+ if(req->cow_offset == -1) -+ return(0); -+ -+ n = os_seek_file(req->fds[1], req->cow_offset); -+ if(n < 0){ -+ printk("do_io - bitmap lseek failed : err = %d\n", -n); -+ return(1); -+ } -+ -+ n = os_write_file(req->fds[1], &req->bitmap_words, -+ sizeof(req->bitmap_words)); -+ if(n != sizeof(req->bitmap_words)){ -+ printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, -+ req->fds[1]); -+ return(1); -+ } - -- if(stat64(file, &buf) < 0) return(0); -- return(S_ISDIR(buf.st_mode)); -+ return(0); - } - - void do_io(struct io_thread_req *req) -@@ -461,8 +234,18 @@ - char *buf; - unsigned long len; - int n, nsectors, start, end, bit; -+ int err; - __u64 off; - -+ if(req->op == UBD_MMAP){ -+ /* Touch the page to force the host to do any necessary IO to -+ * get it into memory -+ */ -+ n = *((volatile int *) req->buffer); -+ req->error = update_bitmap(req); -+ return; -+ } -+ - nsectors = req->length / req->sectorsize; - start = 0; - do { -@@ -473,15 +256,14 @@ - &req->sector_mask) == bit)) - end++; - -- if(end != nsectors) -- printk("end != nsectors\n"); - off = req->offset + req->offsets[bit] + - start * req->sectorsize; - len = (end - start) * req->sectorsize; - buf = &req->buffer[start * req->sectorsize]; - -- if(os_seek_file(req->fds[bit], off) != 0){ -- printk("do_io - lseek failed : errno = %d\n", errno); -+ err = os_seek_file(req->fds[bit], off); -+ if(err < 0){ -+ printk("do_io - lseek failed : err = %d\n", -err); - req->error = 1; - return; - } -@@ -490,11 +272,10 @@ - do { - buf = &buf[n]; - len -= n; -- n = read(req->fds[bit], buf, len); -+ n = os_read_file(req->fds[bit], buf, len); - if (n < 0) { -- printk("do_io - read returned %d : " -- "errno = %d fd = %d\n", n, -- errno, req->fds[bit]); -+ printk("do_io - read failed, err = %d " -+ "fd = %d\n", -n, req->fds[bit]); - req->error = 1; - return; - } -@@ -502,11 +283,10 @@ - if (n < len) memset(&buf[n], 0, len - n); - } - else { -- n = write(req->fds[bit], buf, len); -+ n = os_write_file(req->fds[bit], buf, len); - if(n != len){ -- printk("do_io - write returned %d : " -- "errno = %d fd = %d\n", n, -- errno, req->fds[bit]); -+ printk("do_io - write failed err = %d " -+ "fd = %d\n", -n, req->fds[bit]); - req->error = 1; - return; - } -@@ -515,24 +295,7 @@ - start = end; - } while(start < nsectors); - -- if(req->cow_offset != -1){ -- if(os_seek_file(req->fds[1], req->cow_offset) != 0){ -- printk("do_io - bitmap lseek failed : errno = %d\n", -- errno); -- req->error = 1; -- return; -- } -- n = write(req->fds[1], &req->bitmap_words, -- sizeof(req->bitmap_words)); -- if(n != sizeof(req->bitmap_words)){ -- printk("do_io - bitmap update returned %d : " -- "errno = %d fd = %d\n", n, errno, req->fds[1]); -- req->error = 1; -- return; -- } -- } -- req->error = 0; -- return; -+ req->error = update_bitmap(req); - } - - /* Changed in start_io_thread, which is serialized by being called only -@@ -550,19 +313,23 @@ - - signal(SIGWINCH, SIG_IGN); - while(1){ -- n = read(kernel_fd, &req, sizeof(req)); -- if(n < 0) printk("io_thread - read returned %d, errno = %d\n", -- n, errno); -- else if(n < sizeof(req)){ -- printk("io_thread - short read : length = %d\n", n); -+ n = os_read_file(kernel_fd, &req, sizeof(req)); -+ if(n != sizeof(req)){ -+ if(n < 0) -+ printk("io_thread - read failed, fd = %d, " -+ "err = %d\n", kernel_fd, -n); -+ else { -+ printk("io_thread - short read, fd = %d, " -+ "length = %d\n", kernel_fd, n); -+ } - continue; - } - io_count++; - do_io(&req); -- n = write(kernel_fd, &req, sizeof(req)); -+ n = os_write_file(kernel_fd, &req, sizeof(req)); - if(n != sizeof(req)) -- printk("io_thread - write failed, errno = %d\n", -- errno); -+ printk("io_thread - write failed, fd = %d, err = %d\n", -+ kernel_fd, -n); - } - } - -@@ -571,10 +338,11 @@ - int pid, fds[2], err; - - err = os_pipe(fds, 1, 1); -- if(err){ -- printk("start_io_thread - os_pipe failed, errno = %d\n", -err); -- return(-1); -+ if(err < 0){ -+ printk("start_io_thread - os_pipe failed, err = %d\n", -err); -+ goto out; - } -+ - kernel_fd = fds[0]; - *fd_out = fds[1]; - -@@ -582,32 +350,19 @@ - NULL); - if(pid < 0){ - printk("start_io_thread - clone failed : errno = %d\n", errno); -- return(-errno); -+ goto out_close; - } -- return(pid); --} -- --#ifdef notdef --int start_io_thread(unsigned long sp, int *fd_out) --{ -- int pid; - -- if((kernel_fd = get_pty()) < 0) return(-1); -- raw(kernel_fd, 0); -- if((*fd_out = open(ptsname(kernel_fd), O_RDWR)) < 0){ -- printk("Couldn't open tty for IO\n"); -- return(-1); -- } -- -- pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, -- NULL); -- if(pid < 0){ -- printk("start_io_thread - clone failed : errno = %d\n", errno); -- return(-errno); -- } - return(pid); -+ -+ out_close: -+ os_close_file(fds[0]); -+ os_close_file(fds[1]); -+ kernel_fd = -1; -+ *fd_out = -1; -+ out: -+ return(err); - } --#endif - - /* - * Overrides for Emacs so that we follow Linus's tabbing style. -Index: uml-2.6.7/scripts/basic/fixdep.c -=================================================================== ---- uml-2.6.7.orig/scripts/basic/fixdep.c 2004-07-16 19:36:40.092051544 +0300 -+++ uml-2.6.7/scripts/basic/fixdep.c 2004-07-16 19:47:24.197132688 +0300 -@@ -93,6 +93,14 @@ - * (Note: it'd be easy to port over the complete mkdep state machine, - * but I don't think the added complexity is worth it) - */ -+/* -+ * Note 2: if somebody writes HELLO_CONFIG_BOOM in a file, it will depend onto -+ * CONFIG_BOOM. This could seem a bug (not too hard to fix), but please do not -+ * fix it! Some UserModeLinux files (look at arch/um/) call CONFIG_BOOM as -+ * UML_CONFIG_BOOM, to avoid conflicts with /usr/include/linux/autoconf.h, -+ * through arch/um/include/uml-config.h; this fixdep "bug" makes sure that -+ * those files will have correct dependencies. -+ */ - - #include - #include -@@ -310,6 +318,7 @@ - } - memcpy(s, m, p-m); s[p-m] = 0; - if (strrcmp(s, "include/linux/autoconf.h") && -+ strrcmp(s, "arch/um/include/uml-config.h") && - strrcmp(s, ".ver")) { - printf(" %s \\\n", s); - do_config_file(s); -Index: uml-2.6.7/include/asm-um/processor-i386.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/processor-i386.h 2004-07-16 19:35:55.931764928 +0300 -+++ uml-2.6.7/include/asm-um/processor-i386.h 2004-07-16 19:47:23.794193944 +0300 -@@ -6,8 +6,8 @@ - #ifndef __UM_PROCESSOR_I386_H - #define __UM_PROCESSOR_I386_H - --extern int cpu_has_xmm; --extern int cpu_has_cmov; -+extern int host_has_xmm; -+extern int host_has_cmov; - - struct arch_thread { - unsigned long debugregs[8]; -Index: uml-2.6.7/arch/um/kernel/time.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/time.c 2004-07-16 19:36:10.588536760 +0300 -+++ uml-2.6.7/arch/um/kernel/time.c 2004-07-16 19:47:24.262122808 +0300 -@@ -4,24 +4,34 @@ - */ - - #include -+#include - #include - #include - #include - #include - #include --#include "linux/module.h" -+#include - #include "user_util.h" - #include "kern_util.h" - #include "user.h" - #include "process.h" - #include "signal_user.h" - #include "time_user.h" -+#include "kern_constants.h" -+ -+/* XXX This really needs to be declared and initialized in a kernel file since -+ * it's in -+ */ -+extern struct timespec wall_to_monotonic; - - extern struct timeval xtime; - -+struct timeval local_offset = { 0, 0 }; -+ - void timer(void) - { - gettimeofday(&xtime, NULL); -+ timeradd(&xtime, &local_offset, &xtime); - } - - void set_interval(int timer_type) -@@ -66,7 +76,7 @@ - errno); - } - --void idle_timer(void) -+void uml_idle_timer(void) - { - if(signal(SIGVTALRM, SIG_IGN) == SIG_ERR) - panic("Couldn't unset SIGVTALRM handler"); -@@ -76,14 +86,60 @@ - set_interval(ITIMER_REAL); - } - -+static unsigned long long get_host_hz(void) -+{ -+ char mhzline[16], *end; -+ unsigned long long mhz; -+ int ret, mult, rest, len; -+ -+ ret = cpu_feature("cpu MHz", mhzline, -+ sizeof(mhzline) / sizeof(mhzline[0])); -+ if(!ret) -+ panic ("Could not get host MHZ"); -+ -+ mhz = strtoul(mhzline, &end, 10); -+ -+ /* This business is to parse a floating point number without using -+ * floating types. -+ */ -+ -+ rest = 0; -+ mult = 0; -+ if(*end == '.'){ -+ end++; -+ len = strlen(end); -+ if(len < 6) -+ mult = 6 - len; -+ else if(len > 6) -+ end[6] = '\0'; -+ rest = strtoul(end, NULL, 10); -+ while(mult-- > 0) -+ rest *= 10; -+ } -+ -+ return(1000000 * mhz + rest); -+} -+ -+unsigned long long host_hz = 0; -+ -+extern int do_posix_clock_monotonic_gettime(struct timespec *tp); -+ - void time_init(void) - { -+ struct timespec now; -+ -+ host_hz = get_host_hz(); - if(signal(SIGVTALRM, boot_timer_handler) == SIG_ERR) - panic("Couldn't set SIGVTALRM handler"); - set_interval(ITIMER_VIRTUAL); -+ -+ do_posix_clock_monotonic_gettime(&now); -+ wall_to_monotonic.tv_sec = -now.tv_sec; -+ wall_to_monotonic.tv_nsec = -now.tv_nsec; - } - --struct timeval local_offset = { 0, 0 }; -+/* Declared in linux/time.h, which can't be included here */ -+extern void clock_was_set(void); - - void do_gettimeofday(struct timeval *tv) - { -@@ -96,15 +152,13 @@ - clock_was_set(); - } - --EXPORT_SYMBOL(do_gettimeofday); -- - int do_settimeofday(struct timespec *tv) - { - struct timeval now; - unsigned long flags; - struct timeval tv_in; - -- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) -+ if ((unsigned long) tv->tv_nsec >= UM_NSEC_PER_SEC) - return -EINVAL; - - tv_in.tv_sec = tv->tv_sec; -@@ -114,9 +168,9 @@ - gettimeofday(&now, NULL); - timersub(&tv_in, &now, &local_offset); - time_unlock(flags); --} - --EXPORT_SYMBOL(do_settimeofday); -+ return(0); -+} - - void idle_sleep(int secs) - { -Index: uml-2.6.7/arch/um/drivers/mconsole_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/mconsole_user.c 2004-07-16 19:36:07.180054928 +0300 -+++ uml-2.6.7/arch/um/drivers/mconsole_user.c 2004-07-16 19:47:23.687210208 +0300 -@@ -1,6 +1,6 @@ - /* - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) -- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -@@ -18,16 +18,18 @@ - #include "umid.h" - - static struct mconsole_command commands[] = { -- { "version", mconsole_version, 1 }, -- { "halt", mconsole_halt, 0 }, -- { "reboot", mconsole_reboot, 0 }, -- { "config", mconsole_config, 0 }, -- { "remove", mconsole_remove, 0 }, -- { "sysrq", mconsole_sysrq, 1 }, -- { "help", mconsole_help, 1 }, -- { "cad", mconsole_cad, 1 }, -- { "stop", mconsole_stop, 0 }, -- { "go", mconsole_go, 1 }, -+ { "version", mconsole_version, MCONSOLE_INTR }, -+ { "halt", mconsole_halt, MCONSOLE_PROC }, -+ { "reboot", mconsole_reboot, MCONSOLE_PROC }, -+ { "config", mconsole_config, MCONSOLE_PROC }, -+ { "remove", mconsole_remove, MCONSOLE_PROC }, -+ { "sysrq", mconsole_sysrq, MCONSOLE_INTR }, -+ { "help", mconsole_help, MCONSOLE_INTR }, -+ { "cad", mconsole_cad, MCONSOLE_INTR }, -+ { "stop", mconsole_stop, MCONSOLE_PROC }, -+ { "go", mconsole_go, MCONSOLE_INTR }, -+ { "log", mconsole_log, MCONSOLE_INTR }, -+ { "proc", mconsole_proc, MCONSOLE_PROC }, - }; - - /* Initialized in mconsole_init, which is an initcall */ -@@ -139,6 +141,7 @@ - memcpy(reply.data, str, len); - reply.data[len] = '\0'; - total -= len; -+ str += len; - reply.len = len + 1; - - len = sizeof(reply) + reply.len - sizeof(reply.data); -Index: uml-2.6.7/include/asm-um/module-i386.h -=================================================================== ---- uml-2.6.7.orig/include/asm-um/module-i386.h 2004-07-16 19:47:23.634218264 +0300 -+++ uml-2.6.7/include/asm-um/module-i386.h 2004-07-16 19:47:23.792194248 +0300 -@@ -0,0 +1,13 @@ -+#ifndef __UM_MODULE_I386_H -+#define __UM_MODULE_I386_H -+ -+/* UML is simple */ -+struct mod_arch_specific -+{ -+}; -+ -+#define Elf_Shdr Elf32_Shdr -+#define Elf_Sym Elf32_Sym -+#define Elf_Ehdr Elf32_Ehdr -+ -+#endif -Index: uml-2.6.7/arch/um/kernel/tt/ptproxy/wait.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/tt/ptproxy/wait.c 2004-07-16 19:37:26.029068056 +0300 -+++ uml-2.6.7/arch/um/kernel/tt/ptproxy/wait.c 2004-07-16 19:47:23.748200936 +0300 -@@ -56,21 +56,23 @@ - int real_wait_return(struct debugger *debugger) - { - unsigned long ip; -- int err, pid; -+ int pid; - - pid = debugger->pid; -+ - ip = ptrace(PTRACE_PEEKUSER, pid, PT_IP_OFFSET, 0); -- ip = IP_RESTART_SYSCALL(ip); -- err = ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip); -+ IP_RESTART_SYSCALL(ip); -+ - if(ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip) < 0) - tracer_panic("real_wait_return : Failed to restart system " -- "call, errno = %d\n"); -+ "call, errno = %d\n", errno); -+ - if((ptrace(PTRACE_SYSCALL, debugger->pid, 0, SIGCHLD) < 0) || - (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || - (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || - debugger_normal_return(debugger, -1)) - tracer_panic("real_wait_return : gdb failed to wait, " -- "errno = %d\n"); -+ "errno = %d\n", errno); - return(0); - } - -Index: uml-2.6.7/include/asm-um/common.lds.S -=================================================================== ---- uml-2.6.7.orig/include/asm-um/common.lds.S 2004-07-16 19:36:10.607533872 +0300 -+++ uml-2.6.7/include/asm-um/common.lds.S 2004-07-16 19:47:23.787195008 +0300 -@@ -1,3 +1,5 @@ -+#include -+ - .fini : { *(.fini) } =0x9090 - _etext = .; - PROVIDE (etext = .); -@@ -13,18 +15,6 @@ - - RODATA - -- __start___ksymtab = .; /* Kernel symbol table */ -- __ksymtab : { *(__ksymtab) } -- __stop___ksymtab = .; -- -- __start___gpl_ksymtab = .; /* Kernel symbol table: GPL-only symbols */ -- __gpl_ksymtab : { *(__gpl_ksymtab) } -- __stop___gpl_ksymtab = .; -- -- __start___kallsyms = .; /* All kernel symbols */ -- __kallsyms : { *(__kallsyms) } -- __stop___kallsyms = .; -- - .unprotected : { *(.unprotected) } - . = ALIGN(4096); - PROVIDE (_unprotected_end = .); -@@ -67,11 +57,17 @@ - } - __initcall_end = .; - -+ __con_initcall_start = .; -+ .con_initcall.init : { *(.con_initcall.init) } -+ __con_initcall_end = .; -+ - __uml_initcall_start = .; - .uml.initcall.init : { *(.uml.initcall.init) } - __uml_initcall_end = .; - __init_end = .; - -+ SECURITY_INIT -+ - __exitcall_begin = .; - .exitcall : { *(.exitcall.exit) } - __exitcall_end = .; -@@ -80,7 +76,33 @@ - .uml.exitcall : { *(.uml.exitcall.exit) } - __uml_exitcall_end = .; - -- . = ALIGN(4096); -+ . = ALIGN(4); -+ __alt_instructions = .; -+ .altinstructions : { *(.altinstructions) } -+ __alt_instructions_end = .; -+ .altinstr_replacement : { *(.altinstr_replacement) } -+ /* .exit.text is discard at runtime, not link time, to deal with references -+ from .altinstructions and .eh_frame */ -+ .exit.text : { *(.exit.text) } -+ .exit.data : { *(.exit.data) } -+ -+ __preinit_array_start = .; -+ .preinit_array : { *(.preinit_array) } -+ __preinit_array_end = .; -+ __init_array_start = .; -+ .init_array : { *(.init_array) } -+ __init_array_end = .; -+ __fini_array_start = .; -+ .fini_array : { *(.fini_array) } -+ __fini_array_end = .; -+ -+ . = ALIGN(4096); - __initramfs_start = .; - .init.ramfs : { *(.init.ramfs) } - __initramfs_end = .; -+ -+ /* Sections to be discarded */ -+ /DISCARD/ : { -+ *(.exitcall.exit) -+ } -+ -Index: uml-2.6.7/arch/um/drivers/hostaudio_user.c -=================================================================== ---- uml-2.6.7.orig/arch/um/drivers/hostaudio_user.c 2004-07-16 19:37:08.767692184 +0300 -+++ uml-2.6.7/arch/um/drivers/hostaudio_user.c 1970-01-01 03:00:00.000000000 +0300 -@@ -1,149 +0,0 @@ --/* -- * Copyright (C) 2002 Steve Schmidtke -- * Licensed under the GPL -- */ -- --#include --#include --#include --#include --#include --#include --#include "hostaudio.h" --#include "user_util.h" --#include "kern_util.h" --#include "user.h" --#include "os.h" -- --/* /dev/dsp file operations */ -- --ssize_t hostaudio_read_user(struct hostaudio_state *state, char *buffer, -- size_t count, loff_t *ppos) --{ -- ssize_t ret; -- --#ifdef DEBUG -- printk("hostaudio: read_user called, count = %d\n", count); --#endif -- -- ret = read(state->fd, buffer, count); -- -- if(ret < 0) return(-errno); -- return(ret); --} -- --ssize_t hostaudio_write_user(struct hostaudio_state *state, const char *buffer, -- size_t count, loff_t *ppos) --{ -- ssize_t ret; -- --#ifdef DEBUG -- printk("hostaudio: write_user called, count = %d\n", count); --#endif -- -- ret = write(state->fd, buffer, count); -- -- if(ret < 0) return(-errno); -- return(ret); --} -- --int hostaudio_ioctl_user(struct hostaudio_state *state, unsigned int cmd, -- unsigned long arg) --{ -- int ret; --#ifdef DEBUG -- printk("hostaudio: ioctl_user called, cmd = %u\n", cmd); --#endif -- -- ret = ioctl(state->fd, cmd, arg); -- -- if(ret < 0) return(-errno); -- return(ret); --} -- --int hostaudio_open_user(struct hostaudio_state *state, int r, int w, char *dsp) --{ --#ifdef DEBUG -- printk("hostaudio: open_user called\n"); --#endif -- -- state->fd = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); -- -- if(state->fd >= 0) return(0); -- -- printk("hostaudio_open_user failed to open '%s', errno = %d\n", -- dsp, errno); -- -- return(-errno); --} -- --int hostaudio_release_user(struct hostaudio_state *state) --{ --#ifdef DEBUG -- printk("hostaudio: release called\n"); --#endif -- if(state->fd >= 0){ -- close(state->fd); -- state->fd=-1; -- } -- -- return(0); --} -- --/* /dev/mixer file operations */ -- --int hostmixer_ioctl_mixdev_user(struct hostmixer_state *state, -- unsigned int cmd, unsigned long arg) --{ -- int ret; --#ifdef DEBUG -- printk("hostmixer: ioctl_user called cmd = %u\n",cmd); --#endif -- -- ret = ioctl(state->fd, cmd, arg); -- if(ret < 0) -- return(-errno); -- return(ret); --} -- --int hostmixer_open_mixdev_user(struct hostmixer_state *state, int r, int w, -- char *mixer) --{ --#ifdef DEBUG -- printk("hostmixer: open_user called\n"); --#endif -- -- state->fd = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); -- -- if(state->fd >= 0) return(0); -- -- printk("hostaudio_open_mixdev_user failed to open '%s', errno = %d\n", -- mixer, errno); -- -- return(-errno); --} -- --int hostmixer_release_mixdev_user(struct hostmixer_state *state) --{ --#ifdef DEBUG -- printk("hostmixer: release_user called\n"); --#endif -- -- if(state->fd >= 0){ -- close(state->fd); -- state->fd = -1; -- } -- -- return 0; --} -- --/* -- * Overrides for Emacs so that we follow Linus's tabbing style. -- * Emacs will notice this stuff at the end of the file and automatically -- * adjust the settings for this buffer only. This must remain at the end -- * of the file. -- * --------------------------------------------------------------------------- -- * Local variables: -- * c-file-style: "linux" -- * End: -- */ -Index: uml-2.6.7/arch/um/kernel/time_kern.c -=================================================================== ---- uml-2.6.7.orig/arch/um/kernel/time_kern.c 2004-07-16 19:36:57.116463440 +0300 -+++ uml-2.6.7/arch/um/kernel/time_kern.c 2004-07-16 19:47:24.262122808 +0300 -@@ -30,22 +30,60 @@ - return(HZ); - } - -+/* -+ * Scheduler clock - returns current time in nanosec units. -+ */ -+unsigned long long sched_clock(void) -+{ -+ return (unsigned long long)jiffies_64 * (1000000000 / HZ); -+} -+ - /* Changed at early boot */ - int timer_irq_inited = 0; - --/* missed_ticks will be modified after kernel memory has been -- * write-protected, so this puts it in a section which will be left -- * write-enabled. -- */ --int __attribute__ ((__section__ (".unprotected"))) missed_ticks[NR_CPUS]; -+static int first_tick; -+static unsigned long long prev_tsc; -+#ifdef CONFIG_UML_REAL_TIME_CLOCK -+static long long delta; /* Deviation per interval */ -+#endif -+ -+extern unsigned long long host_hz; - - void timer_irq(union uml_pt_regs *regs) - { -- int cpu = current->thread_info->cpu, ticks = missed_ticks[cpu]; -+ unsigned long long ticks = 0; -+ -+ if(!timer_irq_inited){ -+ /* This is to ensure that ticks don't pile up when -+ * the timer handler is suspended */ -+ first_tick = 0; -+ return; -+ } - -- if(!timer_irq_inited) return; -- missed_ticks[cpu] = 0; -- while(ticks--) do_IRQ(TIMER_IRQ, regs); -+ if(first_tick){ -+#ifdef CONFIG_UML_REAL_TIME_CLOCK -+ unsigned long long tsc; -+ /* We've had 1 tick */ -+ tsc = time_stamp(); -+ -+ delta += tsc - prev_tsc; -+ prev_tsc = tsc; -+ -+ ticks += (delta * HZ) / host_hz; -+ delta -= (ticks * host_hz) / HZ; -+#else -+ ticks = 1; -+#endif -+ } -+ else { -+ prev_tsc = time_stamp(); -+ first_tick = 1; -+ } -+ -+ while(ticks > 0){ -+ do_IRQ(TIMER_IRQ, regs); -+ ticks--; -+ } - } - - void boot_timer_handler(int sig) -@@ -58,12 +96,15 @@ - do_timer(®s); - } - --void um_timer(int irq, void *dev, struct pt_regs *regs) -+irqreturn_t um_timer(int irq, void *dev, struct pt_regs *regs) - { -+ unsigned long flags; -+ - do_timer(regs); -- write_seqlock(&xtime_lock); -+ write_seqlock_irqsave(&xtime_lock, flags); - timer(); -- write_sequnlock(&xtime_lock); -+ write_sequnlock_irqrestore(&xtime_lock, flags); -+ return(IRQ_HANDLED); - } - - long um_time(int * tloc) -@@ -81,12 +122,12 @@ - long um_stime(int * tptr) - { - int value; -- struct timeval new; -+ struct timespec new; - - if (get_user(value, tptr)) - return -EFAULT; - new.tv_sec = value; -- new.tv_usec = 0; -+ new.tv_nsec = 0; - do_settimeofday(&new); - return 0; - } -@@ -125,9 +166,11 @@ - void timer_handler(int sig, union uml_pt_regs *regs) - { - #ifdef CONFIG_SMP -+ local_irq_disable(); - update_process_times(user_context(UPT_SP(regs))); -+ local_irq_enable(); - #endif -- if(current->thread_info->cpu == 0) -+ if(current_thread->cpu == 0) - timer_irq(regs); - } - -@@ -136,6 +179,7 @@ - unsigned long time_lock(void) - { - unsigned long flags; -+ - spin_lock_irqsave(&timer_spinlock, flags); - return(flags); - } -@@ -150,8 +194,8 @@ - int err; - - CHOOSE_MODE(user_time_init_tt(), user_time_init_skas()); -- if((err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", -- NULL)) != 0) -+ err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", NULL); -+ if(err != 0) - printk(KERN_ERR "timer_init : request_irq failed - " - "errno = %d\n", -err); - timer_irq_inited = 1; -@@ -160,7 +204,6 @@ - - __initcall(timer_init); - -- - /* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6-vanilla.patch b/lustre/kernel_patches/patches/vfs_intent-2.6-vanilla.patch deleted file mode 100644 index f761326..0000000 --- a/lustre/kernel_patches/patches/vfs_intent-2.6-vanilla.patch +++ /dev/null @@ -1,799 +0,0 @@ -Index: linux-2.6.6-vanilla/fs/exec.c -=================================================================== ---- linux-2.6.6-vanilla.orig/fs/exec.c 2004-05-26 20:25:42.000000000 +0300 -+++ linux-2.6.6-vanilla/fs/exec.c 2004-06-29 13:56:19.131583960 +0300 -@@ -121,9 +121,10 @@ - struct file * file; - struct nameidata nd; - int error; -+ intent_init(&nd.intent, IT_OPEN); - -- nd.intent.open.flags = FMODE_READ; -- error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); -+ nd.intent.it_flags = FMODE_READ|FMODE_EXEC; -+ error = __user_walk_it(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); - if (error) - goto out; - -@@ -135,7 +136,7 @@ - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -483,8 +484,9 @@ - int err; - struct file *file; - -- nd.intent.open.flags = FMODE_READ; -- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); -+ intent_init(&nd.intent, IT_OPEN); -+ nd.intent.it_flags = FMODE_READ|FMODE_EXEC; -+ err = path_lookup(name, LOOKUP_FOLLOW, &nd); - file = ERR_PTR(err); - - if (!err) { -@@ -497,7 +499,7 @@ - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { -Index: linux-2.6.6-vanilla/fs/namei.c -=================================================================== ---- linux-2.6.6-vanilla.orig/fs/namei.c 2004-05-26 20:25:43.000000000 +0300 -+++ linux-2.6.6-vanilla/fs/namei.c 2004-06-29 12:25:05.243741400 +0300 -@@ -272,8 +272,19 @@ - return 0; - } - -+void intent_release(struct lookup_intent *it) -+{ -+ if (!it) -+ return; -+ if (it->it_magic != INTENT_MAGIC) -+ return; -+ if (it->it_op_release) -+ it->it_op_release(it); -+} -+ - void path_release(struct nameidata *nd) - { -+ intent_release(&nd->intent); - dput(nd->dentry); - mntput(nd->mnt); - } -@@ -350,7 +361,10 @@ - { - struct dentry * result; - struct inode *dir = parent->d_inode; -+ int counter = 0; - -+again: -+ counter++; - down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created -@@ -389,7 +403,10 @@ - if (result->d_op && result->d_op->d_revalidate) { - if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { - dput(result); -- result = ERR_PTR(-ENOENT); -+ if (counter > 10) -+ result = ERR_PTR(-ESTALE); -+ if (!IS_ERR(result)) -+ goto again; - } - } - return result; -@@ -566,6 +583,33 @@ - return PTR_ERR(dentry); - } - -+static int revalidate_special(struct nameidata *nd) -+{ -+ struct dentry *dentry = nd->dentry; -+ int err, counter = 0; -+ -+ revalidate_again: -+ if (!dentry->d_op || !dentry->d_op->d_revalidate) -+ return 0; -+ if (!dentry->d_op->d_revalidate(dentry, nd)) { -+ struct dentry *new; -+ if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC, nd))) -+ return err; -+ new = real_lookup(dentry->d_parent, &dentry->d_name, nd); -+ if (IS_ERR(new)) -+ return PTR_ERR(new); -+ d_invalidate(dentry); -+ dput(dentry); -+ nd->dentry = dentry = new; -+ counter++; -+ if (counter < 10) -+ goto revalidate_again; -+ printk("excessive revalidate_it loops\n"); -+ return -ESTALE; -+ } -+ return 0; -+} -+ - /* - * Name resolution. - * -@@ -666,7 +710,9 @@ - - if (inode->i_op->follow_link) { - mntget(next.mnt); -+ nd->flags |= LOOKUP_LINK_NOTLAST; - err = do_follow_link(next.dentry, nd); -+ nd->flags &= ~LOOKUP_LINK_NOTLAST; - dput(next.dentry); - mntput(next.mnt); - if (err) -@@ -705,14 +751,34 @@ - inode = nd->dentry->d_inode; - /* fallthrough */ - case 1: -+ nd->flags |= LOOKUP_LAST; -+ err = revalidate_special(nd); -+ nd->flags &= ~LOOKUP_LAST; -+ if (!nd->dentry->d_inode) -+ err = -ENOENT; -+ if (err) { -+ path_release(nd); -+ goto return_err; -+ } -+ if (lookup_flags & LOOKUP_DIRECTORY) { -+ err = -ENOTDIR; -+ if(!nd->dentry->d_inode->i_op || -+ !nd->dentry->d_inode->i_op->lookup) { -+ path_release(nd); -+ goto return_err; -+ } -+ } - goto return_reval; - } -+ - if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { - err = nd->dentry->d_op->d_hash(nd->dentry, &this); - if (err < 0) - break; - } -+ nd->flags |= LOOKUP_LAST; - err = do_lookup(nd, &this, &next); -+ nd->flags &= ~LOOKUP_LAST; - if (err) - break; - follow_mount(&next.mnt, &next.dentry); -@@ -946,7 +1004,7 @@ - } - - /* SMP-safe */ --struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct nameidata *nd) - { - unsigned long hash; - struct qstr this; -@@ -966,11 +1024,16 @@ - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return __lookup_hash(&this, base, nd); - access: - return ERR_PTR(-EACCES); - } - -+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+{ -+ return lookup_one_len_it(name, base, len, NULL); -+} -+ - /* - * namei() - * -@@ -982,7 +1045,7 @@ - * that namei follows links, while lnamei does not. - * SMP-safe - */ --int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) -+int fastcall __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd) - { - char *tmp = getname(name); - int err = PTR_ERR(tmp); -@@ -994,6 +1057,12 @@ - return err; - } - -+int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) -+{ -+ intent_init(&nd->intent, IT_LOOKUP); -+ return __user_walk_it(name, flags, nd); -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -1266,8 +1335,8 @@ - acc_mode |= MAY_APPEND; - - /* Fill in the open() intent data */ -- nd->intent.open.flags = flag; -- nd->intent.open.create_mode = mode; -+ nd->intent.it_flags = flag; -+ nd->intent.it_create_mode = mode; - - /* - * The simplest case - just a plain lookup. -@@ -1282,6 +1351,7 @@ - /* - * Create - we need to know the parent. - */ -+ nd->intent.it_op |= IT_CREAT; - error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); - if (error) - return error; -@@ -1298,7 +1368,9 @@ - dir = nd->dentry; - nd->flags &= ~LOOKUP_PARENT; - down(&dir->d_inode->i_sem); -+ nd->flags |= LOOKUP_LAST; - dentry = __lookup_hash(&nd->last, nd->dentry, nd); -+ nd->flags &= ~LOOKUP_LAST; - - do_last: - error = PTR_ERR(dentry); -@@ -1403,7 +1475,9 @@ - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -+ nd->flags |= LOOKUP_LAST; - dentry = __lookup_hash(&nd->last, nd->dentry, nd); -+ nd->flags &= ~LOOKUP_LAST; - putname(nd->last.name); - goto do_last; - } -@@ -2165,7 +2239,9 @@ - __vfs_follow_link(struct nameidata *nd, const char *link) - { - int res = 0; -+ struct lookup_intent it = nd->intent; - char *name; -+ - if (IS_ERR(link)) - goto fail; - -@@ -2175,6 +2251,10 @@ - /* weird __emul_prefix() stuff did it */ - goto out; - } -+ -+ intent_init(&nd->intent, it.it_op); -+ nd->intent.it_flags = it.it_flags; -+ nd->intent.it_create_mode = it.it_create_mode; - res = link_path_walk(link, nd); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) -Index: linux-2.6.6-vanilla/fs/namespace.c -=================================================================== ---- linux-2.6.6-vanilla.orig/fs/namespace.c 2004-05-26 20:25:43.000000000 +0300 -+++ linux-2.6.6-vanilla/fs/namespace.c 2004-06-29 13:50:23.793603544 +0300 -@@ -115,6 +115,7 @@ - - static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) - { -+ memset(old_nd, 0, sizeof(*old_nd)); - old_nd->dentry = mnt->mnt_mountpoint; - old_nd->mnt = mnt->mnt_parent; - mnt->mnt_parent = mnt; -@@ -539,6 +540,7 @@ - return err; - if (!old_name || !*old_name) - return -EINVAL; -+ intent_init(&old_nd.intent, IT_LOOKUP); - err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); - if (err) - return err; -@@ -607,6 +609,7 @@ - return -EPERM; - if (!old_name || !*old_name) - return -EINVAL; -+ intent_init(&old_nd.intent, IT_LOOKUP); - err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); - if (err) - return err; -@@ -756,6 +759,7 @@ - int retval = 0; - int mnt_flags = 0; - -+ intent_init(&nd.intent, IT_LOOKUP); - /* Discard magic */ - if ((flags & MS_MGC_MSK) == MS_MGC_VAL) - flags &= ~MS_MGC_MSK; -Index: linux-2.6.6-vanilla/fs/open.c -=================================================================== ---- linux-2.6.6-vanilla.orig/fs/open.c 2004-05-26 20:25:43.000000000 +0300 -+++ linux-2.6.6-vanilla/fs/open.c 2004-06-29 13:52:28.003720736 +0300 -@@ -213,12 +213,12 @@ - struct nameidata nd; - struct inode * inode; - int error; -- -+ intent_init(&nd.intent, IT_GETATTR); - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; - -- error = user_path_walk(path, &nd); -+ error = user_path_walk_it(path, &nd); - if (error) - goto out; - inode = nd.dentry->d_inode; -@@ -472,6 +472,7 @@ - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ intent_init(&nd.intent, IT_GETATTR); - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -496,13 +497,14 @@ - else - current->cap_effective = current->cap_permitted; - -- res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); -+ res = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); - if (!res) { - res = permission(nd.dentry->d_inode, mode, &nd); - /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ - path_release(&nd); - } - -@@ -517,8 +519,9 @@ - { - struct nameidata nd; - int error; -+ intent_init(&nd.intent, IT_GETATTR); - -- error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); -+ error = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); - if (error) - goto out; - -@@ -568,8 +571,9 @@ - { - struct nameidata nd; - int error; -+ intent_init(&nd.intent, IT_GETATTR); - -- error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); -+ error = __user_walk_it(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); - if (error) - goto out; - -@@ -748,27 +752,8 @@ - * for the internal routines (ie open_namei()/follow_link() etc). 00 is - * used by symlinks. - */ --struct file *filp_open(const char * filename, int flags, int mode) --{ -- int namei_flags, error; -- struct nameidata nd; -- -- namei_flags = flags; -- if ((namei_flags+1) & O_ACCMODE) -- namei_flags++; -- if (namei_flags & O_TRUNC) -- namei_flags |= 2; -- -- error = open_namei(filename, namei_flags, mode, &nd); -- if (!error) -- return dentry_open(nd.dentry, nd.mnt, flags); -- -- return ERR_PTR(error); --} -- --EXPORT_SYMBOL(filp_open); -- --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags, -+ struct lookup_intent *it) - { - struct file * f; - struct inode *inode; -@@ -780,6 +765,7 @@ - goto cleanup_dentry; - f->f_flags = flags; - f->f_mode = (flags+1) & O_ACCMODE; -+ f->f_it = it; - inode = dentry->d_inode; - if (f->f_mode & FMODE_WRITE) { - error = get_write_access(inode); -@@ -799,6 +785,7 @@ - error = f->f_op->open(inode,f); - if (error) - goto cleanup_all; -+ intent_release(it); - } - f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); - -@@ -823,6 +810,7 @@ - cleanup_file: - put_filp(f); - cleanup_dentry: -+ intent_release(it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); -@@ -830,6 +818,36 @@ - - EXPORT_SYMBOL(dentry_open); - -+struct file *filp_open(const char * filename, int flags, int mode) -+{ -+ int namei_flags, error; -+ struct file * temp_filp; -+ struct nameidata nd; -+ intent_init(&nd.intent, IT_OPEN); -+ -+ namei_flags = flags; -+ if ((namei_flags+1) & O_ACCMODE) -+ namei_flags++; -+ if (namei_flags & O_TRUNC) -+ namei_flags |= 2; -+ -+ error = open_namei(filename, namei_flags, mode, &nd); -+ if (!error) { -+ temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent); -+ return temp_filp; -+ } -+ return ERR_PTR(error); -+} -+ -+ -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ struct lookup_intent it; -+ intent_init(&it, IT_LOOKUP); -+ -+ return dentry_open_it(dentry, mnt, flags, &it); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ -Index: linux-2.6.6-vanilla/fs/stat.c -=================================================================== ---- linux-2.6.6-vanilla.orig/fs/stat.c 2004-05-26 20:25:43.000000000 +0300 -+++ linux-2.6.6-vanilla/fs/stat.c 2004-06-29 12:23:41.822423368 +0300 -@@ -36,7 +36,7 @@ - - EXPORT_SYMBOL(generic_fillattr); - --int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) -+int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat) - { - struct inode *inode = dentry->d_inode; - int retval; -@@ -45,6 +45,8 @@ - if (retval) - return retval; - -+ if (inode->i_op->getattr_it) -+ return inode->i_op->getattr_it(mnt, dentry, it, stat); - if (inode->i_op->getattr) - return inode->i_op->getattr(mnt, dentry, stat); - -@@ -61,14 +63,20 @@ - - EXPORT_SYMBOL(vfs_getattr); - -+int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) -+{ -+ return vfs_getattr_it(mnt, dentry, NULL, stat); -+} -+ - int vfs_stat(char __user *name, struct kstat *stat) - { - struct nameidata nd; - int error; -+ intent_init(&nd.intent, IT_GETATTR); - -- error = user_path_walk(name, &nd); -+ error = user_path_walk_it(name, &nd); - if (!error) { -- error = vfs_getattr(nd.mnt, nd.dentry, stat); -+ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); - path_release(&nd); - } - return error; -@@ -80,10 +88,11 @@ - { - struct nameidata nd; - int error; -+ intent_init(&nd.intent, IT_GETATTR); - -- error = user_path_walk_link(name, &nd); -+ error = user_path_walk_link_it(name, &nd); - if (!error) { -- error = vfs_getattr(nd.mnt, nd.dentry, stat); -+ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); - path_release(&nd); - } - return error; -@@ -95,9 +104,12 @@ - { - struct file *f = fget(fd); - int error = -EBADF; -+ struct nameidata nd; -+ intent_init(&nd.intent, IT_GETATTR); - - if (f) { -- error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat); -+ error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat); -+ intent_release(&nd.intent); - fput(f); - } - return error; -Index: linux-2.6.6-vanilla/fs/nfs/dir.c -=================================================================== ---- linux-2.6.6-vanilla.orig/fs/nfs/dir.c 2004-05-26 20:25:50.000000000 +0300 -+++ linux-2.6.6-vanilla/fs/nfs/dir.c 2004-06-29 13:14:46.887462656 +0300 -@@ -705,7 +705,7 @@ - return 0; - if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE)) - return 0; -- return (nd->intent.open.flags & O_EXCL) != 0; -+ return (nd->intent.it_flags & O_EXCL) != 0; - } - - static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) -@@ -1022,7 +1022,7 @@ - attr.ia_valid = ATTR_MODE; - - if (nd && (nd->flags & LOOKUP_CREATE)) -- open_flags = nd->intent.open.flags; -+ open_flags = nd->intent.it_flags; - - /* - * The 0 argument passed into the create function should one day -Index: linux-2.6.6-vanilla/fs/inode.c -=================================================================== ---- linux-2.6.6-vanilla.orig/fs/inode.c 2004-05-26 20:25:43.000000000 +0300 -+++ linux-2.6.6-vanilla/fs/inode.c 2004-06-29 12:23:41.844420024 +0300 -@@ -220,6 +220,7 @@ - inodes_stat.nr_unused--; - } - -+EXPORT_SYMBOL(__iget); - /** - * clear_inode - clear an inode - * @inode: inode to clear -Index: linux-2.6.6-vanilla/fs/super.c -=================================================================== ---- linux-2.6.6-vanilla.orig/fs/super.c 2004-05-26 20:25:43.000000000 +0300 -+++ linux-2.6.6-vanilla/fs/super.c 2004-06-29 12:23:41.846419720 +0300 -@@ -788,6 +788,8 @@ - return (struct vfsmount *)sb; - } - -+EXPORT_SYMBOL(do_kern_mount); -+ - struct vfsmount *kern_mount(struct file_system_type *type) - { - return do_kern_mount(type->name, 0, type->name, NULL); -Index: linux-2.6.6-vanilla/include/linux/dcache.h -=================================================================== ---- linux-2.6.6-vanilla.orig/include/linux/dcache.h 2004-05-26 20:26:11.000000000 +0300 -+++ linux-2.6.6-vanilla/include/linux/dcache.h 2004-06-29 12:23:41.847419568 +0300 -@@ -4,6 +4,7 @@ - #ifdef __KERNEL__ - - #include -+#include - #include - #include - #include -@@ -35,6 +36,8 @@ - char name_str[0]; - }; - -+#include -+ - struct dentry_stat_t { - int nr_dentry; - int nr_unused; -Index: linux-2.6.6-vanilla/include/linux/fs.h -=================================================================== ---- linux-2.6.6-vanilla.orig/include/linux/fs.h 2004-05-26 20:26:11.000000000 +0300 -+++ linux-2.6.6-vanilla/include/linux/fs.h 2004-06-29 12:23:41.850419112 +0300 -@@ -76,6 +76,7 @@ - - #define FMODE_READ 1 - #define FMODE_WRITE 2 -+#define FMODE_EXEC 4 - - #define RW_MASK 1 - #define RWA_MASK 2 -@@ -249,6 +249,8 @@ - #define ATTR_ATTR_FLAG 1024 - #define ATTR_KILL_SUID 2048 - #define ATTR_KILL_SGID 4096 -+#define ATTR_RAW 8192 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 16384 /* called from open path, ie O_TRUNC */ - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -443,6 +445,7 @@ - struct block_device *i_bdev; - struct cdev *i_cdev; - int i_cindex; -+ void *i_filterdata; - - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ -@@ -576,6 +579,7 @@ - spinlock_t f_ep_lock; - #endif /* #ifdef CONFIG_EPOLL */ - struct address_space *f_mapping; -+ struct lookup_intent *f_it; - }; - extern spinlock_t files_lock; - #define file_list_lock() spin_lock(&files_lock); -@@ -900,7 +904,9 @@ - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int, struct nameidata *); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); -+ int (*getattr_it) (struct vfsmount *, struct dentry *, struct lookup_intent *, struct kstat *); - int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); - ssize_t (*listxattr) (struct dentry *, char *, size_t); -@@ -1126,6 +1132,7 @@ - extern int unregister_filesystem(struct file_system_type *); - extern struct vfsmount *kern_mount(struct file_system_type *); - extern int may_umount(struct vfsmount *); -+struct vfsmount *do_kern_mount(const char *type, int flags, const char *name, void *data); - extern long do_mount(char *, char *, char *, unsigned long, void *); - - extern int vfs_statfs(struct super_block *, struct kstatfs *); -@@ -1190,6 +1197,7 @@ - extern int do_truncate(struct dentry *, loff_t start); - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -+extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char __user *); - -Index: linux-2.6.6-vanilla/include/linux/namei.h -=================================================================== ---- linux-2.6.6-vanilla.orig/include/linux/namei.h 2004-05-26 20:26:11.000000000 +0300 -+++ linux-2.6.6-vanilla/include/linux/namei.h 2004-06-29 12:23:41.852418808 +0300 -@@ -2,25 +2,55 @@ - #define _LINUX_NAMEI_H - - #include -+#include - - struct vfsmount; -+struct nameidata; - --struct open_intent { -- int flags; -- int create_mode; -+/* intent opcodes */ -+#define IT_OPEN (1) -+#define IT_CREAT (1<<1) -+#define IT_READDIR (1<<2) -+#define IT_GETATTR (1<<3) -+#define IT_LOOKUP (1<<4) -+#define IT_UNLINK (1<<5) -+#define IT_TRUNC (1<<6) -+#define IT_GETXATTR (1<<7) -+ -+struct lustre_intent_data { -+ int it_disposition; -+ int it_status; -+ __u64 it_lock_handle; -+ void *it_data; -+ int it_lock_mode; - }; - -+#define INTENT_MAGIC 0x19620323 -+struct lookup_intent { -+ int it_magic; -+ void (*it_op_release)(struct lookup_intent *); -+ int it_op; -+ int it_flags; -+ int it_create_mode; -+ union { -+ struct lustre_intent_data lustre; -+ } d; -+}; -+ -+static inline void intent_init(struct lookup_intent *it, int op) -+{ -+ memset(it, 0, sizeof(*it)); -+ it->it_magic = INTENT_MAGIC; -+ it->it_op = op; -+} -+ - struct nameidata { - struct dentry *dentry; - struct vfsmount *mnt; - struct qstr last; - unsigned int flags; - int last_type; -- -- /* Intent data */ -- union { -- struct open_intent open; -- } intent; -+ struct lookup_intent intent; - }; - - /* -@@ -41,6 +71,9 @@ - #define LOOKUP_CONTINUE 4 - #define LOOKUP_PARENT 16 - #define LOOKUP_NOALT 32 -+#define LOOKUP_LAST (1<<6) -+#define LOOKUP_LINK_NOTLAST (1<<7) -+ - /* - * Intent data - */ -@@ -49,6 +82,12 @@ - #define LOOKUP_ACCESS (0x0400) - - extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)); -+#define user_path_walk_it(name,nd) \ -+ __user_walk_it(name, LOOKUP_FOLLOW, nd) -+#define user_path_walk_link_it(name,nd) \ -+ __user_walk_it(name, 0, nd) -+extern void intent_release(struct lookup_intent *); - #define user_path_walk(name,nd) \ - __user_walk(name, LOOKUP_FOLLOW, nd) - #define user_path_walk_link(name,nd) \ -@@ -60,7 +99,6 @@ - - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); -- - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); - -Index: linux-2.6.6-vanilla/kernel/exit.c -=================================================================== ---- linux-2.6.6-vanilla.orig/kernel/exit.c 2004-05-26 20:26:14.000000000 +0300 -+++ linux-2.6.6-vanilla/kernel/exit.c 2004-06-29 12:23:41.854418504 +0300 -@@ -254,6 +254,8 @@ - write_unlock_irq(&tasklist_lock); - } - -+EXPORT_SYMBOL(reparent_to_init); -+ - void __set_special_pids(pid_t session, pid_t pgrp) - { - struct task_struct *curr = current; -@@ -434,6 +436,8 @@ - __exit_files(tsk); - } - -+EXPORT_SYMBOL(exit_files); -+ - static inline void __put_fs_struct(struct fs_struct *fs) - { - /* No need to hold fs->lock if we are killing it */ diff --git a/lustre/kernel_patches/patches/vfs_nointent-2.6-vanilla.patch b/lustre/kernel_patches/patches/vfs_nointent-2.6-vanilla.patch deleted file mode 100644 index ea081d6..0000000 --- a/lustre/kernel_patches/patches/vfs_nointent-2.6-vanilla.patch +++ /dev/null @@ -1,485 +0,0 @@ - 0 files changed - -.old..........pc/vfs_nointent_2.6.0-suse/fs/namei.c -.new.........fs/namei.c -Index: linux-2.6.4-51.0/fs/namei.c -=================================================================== ---- linux-2.6.4-51.0.orig/fs/namei.c 2004-04-05 17:36:42.000000000 -0400 -+++ linux-2.6.4-51.0/fs/namei.c 2004-04-05 17:36:43.000000000 -0400 -@@ -1276,7 +1276,7 @@ - if (!error) { - DQUOT_INIT(inode); - -- error = do_truncate(dentry, 0); -+ error = do_truncate(dentry, 0, 1); - } - put_write_access(inode); - if (error) -@@ -1526,6 +1526,7 @@ - char * tmp; - struct dentry * dentry; - struct nameidata nd; -+ intent_init(&nd.intent, IT_LOOKUP); - - if (S_ISDIR(mode)) - return -EPERM; -@@ -1536,6 +1537,15 @@ - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -+ -+ if (nd.dentry->d_inode->i_op->mknod_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod_raw(&nd, mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ - dentry = lookup_create(&nd, 0); - error = PTR_ERR(dentry); - -@@ -1562,6 +1572,7 @@ - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1603,10 +1614,20 @@ - if (!IS_ERR(tmp)) { - struct dentry *dentry; - struct nameidata nd; -+ intent_init(&nd.intent, IT_LOOKUP); - - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -+ -+ if (nd.dentry->d_inode->i_op->mkdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir_raw(&nd, mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ - dentry = lookup_create(&nd, 1); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { -@@ -1616,6 +1635,7 @@ - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1696,6 +1716,7 @@ - char * name; - struct dentry *dentry; - struct nameidata nd; -+ intent_init(&nd.intent, IT_LOOKUP); - - name = getname(pathname); - if(IS_ERR(name)) -@@ -1716,6 +1737,16 @@ - error = -EBUSY; - goto exit1; - } -+ -+ if (nd.dentry->d_inode->i_op->rmdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ error = op->rmdir_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } -+ - down(&nd.dentry->d_inode->i_sem); - dentry = lookup_hash(&nd.last, nd.dentry); - error = PTR_ERR(dentry); -@@ -1774,6 +1805,7 @@ - struct dentry *dentry; - struct nameidata nd; - struct inode *inode = NULL; -+ intent_init(&nd.intent, IT_LOOKUP); - - name = getname(pathname); - if(IS_ERR(name)) -@@ -1785,6 +1817,13 @@ - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); - dentry = lookup_hash(&nd.last, nd.dentry); - error = PTR_ERR(dentry); -@@ -1852,10 +1891,18 @@ - if (!IS_ERR(to)) { - struct dentry *dentry; - struct nameidata nd; -+ intent_init(&nd.intent, IT_LOOKUP); - - error = path_lookup(to, LOOKUP_PARENT, &nd); - if (error) - goto out; -+ if (nd.dentry->d_inode->i_op->symlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink_raw(&nd, from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } - dentry = lookup_create(&nd, 0); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { -@@ -1863,6 +1910,7 @@ - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(to); -@@ -1926,6 +1974,8 @@ - struct nameidata nd, old_nd; - int error; - char * to; -+ intent_init(&nd.intent, IT_LOOKUP); -+ intent_init(&old_nd.intent, IT_LOOKUP); - - to = getname(newname); - if (IS_ERR(to)) -@@ -1940,6 +1990,13 @@ - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -+ if (nd.dentry->d_inode->i_op->link_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link_raw(&old_nd, &nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } - new_dentry = lookup_create(&nd, 0); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { -@@ -2112,6 +2169,8 @@ - struct dentry * old_dentry, *new_dentry; - struct dentry * trap; - struct nameidata oldnd, newnd; -+ intent_init(&oldnd.intent, IT_LOOKUP); -+ intent_init(&newnd.intent, IT_LOOKUP); - - error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); - if (error) -@@ -2134,6 +2193,13 @@ - if (newnd.last_type != LAST_NORM) - goto exit2; - -+ if (old_dir->d_inode->i_op->rename_raw) { -+ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit2; -+ } -+ - trap = lock_rename(new_dir, old_dir); - - old_dentry = lookup_hash(&oldnd.last, old_dir); -@@ -2165,8 +2231,7 @@ - if (new_dentry == trap) - goto exit5; - -- error = vfs_rename(old_dir->d_inode, old_dentry, -- new_dir->d_inode, new_dentry); -+ error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); - exit5: - dput(new_dentry); - exit4: -Index: linux-2.6.4-51.0/fs/open.c -=================================================================== ---- linux-2.6.4-51.0.orig/fs/open.c 2004-04-05 17:36:42.000000000 -0400 -+++ linux-2.6.4-51.0/fs/open.c 2004-04-06 01:37:39.000000000 -0400 -@@ -187,9 +187,10 @@ - return error; - } - --int do_truncate(struct dentry *dentry, loff_t length) -+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) - { - int err; -+ struct inode_operations *op = dentry->d_inode->i_op; - struct iattr newattrs; - - /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ -@@ -200,7 +201,14 @@ - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; - down(&dentry->d_inode->i_sem); - down_write(&dentry->d_inode->i_alloc_sem); -- err = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ newattrs.ia_ctime = CURRENT_TIME; -+ err = op->setattr_raw(dentry->d_inode, &newattrs); -+ } else -+ err = notify_change(dentry, &newattrs); - up_write(&dentry->d_inode->i_alloc_sem); - up(&dentry->d_inode->i_sem); - return err; -@@ -256,7 +264,7 @@ - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length); -+ error = do_truncate(nd.dentry, length, 0); - } - put_write_access(inode); - -@@ -308,7 +316,7 @@ - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length); -+ error = do_truncate(dentry, length, 0); - out_putf: - fput(file); - out: -@@ -387,9 +395,19 @@ - (error = permission(inode,MAY_WRITE,&nd)) != 0) - goto dput_and_out; - } -- down(&inode->i_sem); -- error = notify_change(nd.dentry, &newattrs); -- up(&inode->i_sem); -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use the normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } else { -+ down(&inode->i_sem); -+ error = notify_change(nd.dentry, &newattrs); -+ up(&inode->i_sem); -+ } - dput_and_out: - path_release(&nd); - out: -@@ -440,9 +458,19 @@ - (error = permission(inode,MAY_WRITE,&nd)) != 0) - goto dput_and_out; - } -- down(&inode->i_sem); -- error = notify_change(nd.dentry, &newattrs); -- up(&inode->i_sem); -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } else { -+ down(&inode->i_sem); -+ error = notify_change(nd.dentry, &newattrs); -+ up(&inode->i_sem); -+ } - dput_and_out: - path_release(&nd); - out: -@@ -592,36 +620,52 @@ - return error; - } - --asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+int chmod_common(struct dentry *dentry, mode_t mode) - { -- struct inode * inode; -- struct dentry * dentry; -- struct file * file; -- int err = -EBADF; -+ struct inode * inode = dentry->d_inode; - struct iattr newattrs; -+ int error = -EROFS; - -- file = fget(fd); -- if (!file) -+ if (IS_RDONLY(inode)) - goto out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; - -- dentry = file->f_dentry; -- inode = dentry->d_inode; -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out; -+ } - -- err = -EROFS; -- if (IS_RDONLY(inode)) -- goto out_putf; -- err = -EPERM; -+ error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto out_putf; -+ goto out; -+ - down(&inode->i_sem); - if (mode == (mode_t) -1) - mode = inode->i_mode; - newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -- err = notify_change(dentry, &newattrs); -+ error = notify_change(dentry, &newattrs); - up(&inode->i_sem); -+out: -+ return error; -+} - --out_putf: -+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+{ -+ struct file * file; -+ int err = -EBADF; -+ -+ file = fget(fd); -+ if (!file) -+ goto out; -+ -+ err = chmod_common(file->f_dentry, mode); - fput(file); - out: - return err; -@@ -630,32 +674,13 @@ - asmlinkage long sys_chmod(const char __user * filename, mode_t mode) - { - struct nameidata nd; -- struct inode * inode; - int error; -- struct iattr newattrs; - - error = user_path_walk(filename, &nd); - if (error) - goto out; -- inode = nd.dentry->d_inode; -- -- error = -EROFS; -- if (IS_RDONLY(inode)) -- goto dput_and_out; -- -- error = -EPERM; -- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto dput_and_out; -- -- down(&inode->i_sem); -- if (mode == (mode_t) -1) -- mode = inode->i_mode; -- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); -- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -- error = notify_change(nd.dentry, &newattrs); -- up(&inode->i_sem); - --dput_and_out: -+ error = chmod_common(nd.dentry, mode); - path_release(&nd); - out: - return error; -@@ -676,6 +701,18 @@ - if (IS_RDONLY(inode)) - goto out; - error = -EPERM; -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; - newattrs.ia_valid = ATTR_CTIME; -Index: linux-2.6.4-51.0/fs/exec.c -=================================================================== ---- linux-2.6.4-51.0.orig/fs/exec.c 2004-04-05 17:36:42.000000000 -0400 -+++ linux-2.6.4-51.0/fs/exec.c 2004-04-05 17:36:43.000000000 -0400 -@@ -1418,7 +1418,7 @@ - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0) != 0) -+ if (do_truncate(file->f_dentry, 0, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); -Index: linux-2.6.4-51.0/include/linux/fs.h -=================================================================== ---- linux-2.6.4-51.0.orig/include/linux/fs.h 2004-04-05 17:36:43.000000000 -0400 -+++ linux-2.6.4-51.0/include/linux/fs.h 2004-04-05 17:36:43.000000000 -0400 -@@ -866,13 +866,20 @@ - int (*create) (struct inode *,struct dentry *,int, struct nameidata *); - struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link_raw) (struct nameidata *,struct nameidata *); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink_raw) (struct nameidata *); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink_raw) (struct nameidata *,const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir_raw) (struct nameidata *,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir_raw) (struct nameidata *); - int (*mknod) (struct inode *,struct dentry *,int,dev_t); -+ int (*mknod_raw) (struct nameidata *,int,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename_raw) (struct nameidata *, struct nameidata *); - int (*readlink) (struct dentry *, char __user *,int); - int (*follow_link) (struct dentry *, struct nameidata *); - void (*truncate) (struct inode *); -@@ -1169,7 +1176,7 @@ - - /* fs/open.c */ - --extern int do_truncate(struct dentry *, loff_t start); -+extern int do_truncate(struct dentry *, loff_t start, int called_from_open); - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); - extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); -Index: linux-2.6.4-51.0/net/unix/af_unix.c -=================================================================== ---- linux-2.6.4-51.0.orig/net/unix/af_unix.c 2004-04-05 12:42:07.000000000 -0400 -+++ linux-2.6.4-51.0/net/unix/af_unix.c 2004-04-05 17:36:43.000000000 -0400 -@@ -676,6 +676,7 @@ - int err = 0; - - if (sunname->sun_path[0]) { -+ intent_init(&nd.intent, IT_LOOKUP); - err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); - if (err) - goto fail; diff --git a/lustre/kernel_patches/patches/vfs_races-2.6-vanilla.patch b/lustre/kernel_patches/patches/vfs_races-2.6-vanilla.patch deleted file mode 100644 index ccca64d..0000000 --- a/lustre/kernel_patches/patches/vfs_races-2.6-vanilla.patch +++ /dev/null @@ -1,65 +0,0 @@ -Index: linux-2.6.7-vanilla/fs/dcache.c -=================================================================== ---- linux-2.6.7-vanilla.orig/fs/dcache.c 2004-07-01 12:09:19.000000000 +0300 -+++ linux-2.6.7-vanilla/fs/dcache.c 2004-07-01 12:29:12.510193264 +0300 -@@ -219,7 +219,14 @@ - spin_unlock(&dcache_lock); - return 0; - } -- /* -+ -+ /* network invalidation by Lustre */ -+ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { -+ spin_unlock(&dcache_lock); -+ return 0; -+ } -+ -+ /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. - */ -@@ -1114,19 +1121,28 @@ - * Adds a dentry to the hash according to its name. - */ - --void d_rehash(struct dentry * entry) -+void __d_rehash(struct dentry * entry, int lock) - { - struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash); - -- spin_lock(&dcache_lock); -+ if (lock) -+ spin_lock(&dcache_lock); - spin_lock(&entry->d_lock); - entry->d_flags &= ~DCACHE_UNHASHED; - spin_unlock(&entry->d_lock); - entry->d_bucket = list; - hlist_add_head_rcu(&entry->d_hash, list); -- spin_unlock(&dcache_lock); -+ if (lock) -+ spin_unlock(&dcache_lock); - } - -+EXPORT_SYMBOL(__d_rehash); -+ -+void d_rehash(struct dentry * entry) -+{ -+ __d_rehash(entry, 1); -+ } -+ - #define do_switch(x,y) do { \ - __typeof__ (x) __tmp = x; \ - x = y; y = __tmp; } while (0) -Index: linux-2.6.7-vanilla/include/linux/dcache.h -=================================================================== ---- linux-2.6.7-vanilla.orig/include/linux/dcache.h 2004-07-01 12:24:53.602553208 +0300 -+++ linux-2.6.7-vanilla/include/linux/dcache.h 2004-07-01 12:27:29.757814000 +0300 -@@ -159,6 +159,8 @@ - - #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ - #define DCACHE_UNHASHED 0x0010 -+#define DCACHE_LUSTRE_INVALID 0x0020 /* Lustre invalidated */ -+ - - extern spinlock_t dcache_lock; - diff --git a/lustre/kernel_patches/series/2.6-suse.series b/lustre/kernel_patches/series/2.6-suse.series index 06c175f..a30d9f1 100644 --- a/lustre/kernel_patches/series/2.6-suse.series +++ b/lustre/kernel_patches/series/2.6-suse.series @@ -12,3 +12,4 @@ export-2.6-suse.patch header-guards-2.6-suse.patch md_path_lookup-2.6-suse.patch ext3-super-ntohl.patch +export-show_task-2.6-vanilla.patch diff --git a/lustre/kernel_patches/series/2.6-vanilla.series b/lustre/kernel_patches/series/2.6-vanilla.series deleted file mode 100644 index 7fa0d34..0000000 --- a/lustre/kernel_patches/series/2.6-vanilla.series +++ /dev/null @@ -1,15 +0,0 @@ -uml-2.6.7-01-bb2.patch -lustre_version.patch -vfs_intent-2.6-vanilla.patch -vfs_nointent-2.6-vanilla.patch -vfs_races-2.6-vanilla.patch -ext3-wantedi-misc-2.6-suse.patch -nfs-cifs-intent-2.6-vanilla.patch -iopen-misc-2.6-suse.patch -export-truncate-2.6-suse.patch -export_symbols-2.6-suse.patch -dev_read_only-2.6-suse.patch -export-2.6-suse.patch -header-guards-2.6-suse.patch -lookup_bdev_init_intent.patch -ext3-super-ntohl.patch diff --git a/lustre/kernel_patches/series/chaos-2.4.21 b/lustre/kernel_patches/series/chaos-2.4.21 index 13a20cc..f455f13 100644 --- a/lustre/kernel_patches/series/chaos-2.4.21 +++ b/lustre/kernel_patches/series/chaos-2.4.21 @@ -30,7 +30,6 @@ nfs_export_kernel-2.4.21-chaos.patch ext3-ea-in-inode-2.4.21-chaos.patch listman-2.4.21-chaos.patch ext3-xattr-ptr-arith-fix.patch -kernel_text_address-2.4.18-chaos.patch pagecache-lock-2.4.21-chaos.patch ext3-truncate-buffer-head.patch inode-max-readahead-2.4.24.patch diff --git a/lustre/kernel_patches/series/hp-pnnl-2.4.20 b/lustre/kernel_patches/series/hp-pnnl-2.4.20 index 033119a..de4e0e1 100644 --- a/lustre/kernel_patches/series/hp-pnnl-2.4.20 +++ b/lustre/kernel_patches/series/hp-pnnl-2.4.20 @@ -47,4 +47,3 @@ ext3-extents-2.4.24.patch ext3-extents-asyncdel-2.4.24.patch ext3-mballoc-2.4.24.patch ext3-nlinks-2.4.20-hp_pnnl.patch -export-zap-page-range.patch diff --git a/lustre/kernel_patches/series/ldiskfs-2.6-vanilla.series b/lustre/kernel_patches/series/ldiskfs-2.6-vanilla.series deleted file mode 100644 index 13cf85a..0000000 --- a/lustre/kernel_patches/series/ldiskfs-2.6-vanilla.series +++ /dev/null @@ -1,11 +0,0 @@ -ext3-wantedi-2.6-suse.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-vanilla.patch -export_symbols-ext3-2.6-suse.patch -ext3-map_inode_page-2.6-suse.patch -ext3-ea-in-inode-2.6-suse.patch -export-ext3-2.6-suse.patch -ext3-include-fixes-2.6-suse.patch -ext3-extents-2.6.7.patch -ext3-mballoc2-2.6.7.patch -ext3-nlinks-2.6.7.patch diff --git a/lustre/kernel_patches/series/rh-2.4.20 b/lustre/kernel_patches/series/rh-2.4.20 index bbb327d..68583ba 100644 --- a/lustre/kernel_patches/series/rh-2.4.20 +++ b/lustre/kernel_patches/series/rh-2.4.20 @@ -39,7 +39,6 @@ ext3-ea-in-inode-2.4.20.patch listman-2.4.20.patch ext3-trusted_ea-2.4.20.patch netconsole-2.4.20-rh.patch -kernel_text_address-2.4.20-rh.patch ext3-xattr-ptr-arith-fix.patch procfs-ndynamic-2.4.patch ext3-truncate-buffer-head.patch @@ -49,3 +48,4 @@ ext3-extents-2.4.20-rh.patch ext3-extents-asyncdel-2.4.20-rh.patch ext3-mballoc-2.4.24.patch x86-fpu-crash.patch +export-show_task-2.4-rh.patch diff --git a/lustre/kernel_patches/series/rh-2.4.22 b/lustre/kernel_patches/series/rh-2.4.22 index 394443b..2e2fa8b 100644 --- a/lustre/kernel_patches/series/rh-2.4.22 +++ b/lustre/kernel_patches/series/rh-2.4.22 @@ -27,5 +27,4 @@ listman-2.4.20.patch ext3-trusted_ea-2.4.20.patch ext3-xattr-ptr-arith-fix.patch procfs-ndynamic-2.4.patch -kernel_text_address-2.4.18-chaos.patch ext3-truncate-buffer-head.patch diff --git a/lustre/kernel_patches/series/rhel-2.4.21 b/lustre/kernel_patches/series/rhel-2.4.21 index ae8bef7..bc6924a 100644 --- a/lustre/kernel_patches/series/rhel-2.4.21 +++ b/lustre/kernel_patches/series/rhel-2.4.21 @@ -30,10 +30,10 @@ nfs_export_kernel-2.4.21-chaos.patch ext3-ea-in-inode-2.4.21-chaos.patch listman-2.4.21-chaos.patch ext3-xattr-ptr-arith-fix.patch -kernel_text_address-2.4.18-chaos.patch pagecache-lock-2.4.21-chaos.patch ext3-truncate-buffer-head.patch inode-max-readahead-2.4.24.patch dcache_refcount_debug.patch blkdev_tunables-2.4.21-chaos.patch ext3-nlinks-2.4.21-chaos.patch +export-show_task-2.4-rhel.patch diff --git a/lustre/kernel_patches/series/suse-2.4.21-171 b/lustre/kernel_patches/series/suse-2.4.21-171 index 2922e0f..6a0919e 100644 --- a/lustre/kernel_patches/series/suse-2.4.21-171 +++ b/lustre/kernel_patches/series/suse-2.4.21-171 @@ -26,5 +26,4 @@ nfs_export_kernel-2.4.21-suse2.patch ext3-raw-lookup.patch ext3-ea-in-inode-2.4.21-suse2.patch listman-2.4.20.patch -kernel_text_address-2.4.20-vanilla.patch ext3-truncate-buffer-head.patch diff --git a/lustre/kernel_patches/series/suse-2.4.21-2 b/lustre/kernel_patches/series/suse-2.4.21-2 index 62a745a..27928ea 100644 --- a/lustre/kernel_patches/series/suse-2.4.21-2 +++ b/lustre/kernel_patches/series/suse-2.4.21-2 @@ -27,7 +27,6 @@ ext3-raw-lookup.patch ext3-ea-in-inode-2.4.21-suse2.patch listman-2.4.20.patch ext3-xattr-ptr-arith-fix.patch -kernel_text_address-2.4.20-vanilla.patch procfs-ndynamic-2.4.21-suse2.patch ext3-truncate-buffer-head.patch loop-sync-2.4.21-suse.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.24 b/lustre/kernel_patches/series/vanilla-2.4.24 index 8cbe061..735db03 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.24 +++ b/lustre/kernel_patches/series/vanilla-2.4.24 @@ -30,7 +30,6 @@ ext3-raw-lookup.patch ext3-ea-in-inode-2.4.22-rh.patch listman-2.4.20.patch ext3-trusted_ea-2.4.20.patch -kernel_text_address-2.4.22-vanilla.patch ext3-xattr-ptr-arith-fix.patch 3.5G-address-space-2.4.22-vanilla.patch procfs-ndynamic-2.4.patch @@ -39,6 +38,6 @@ inode-max-readahead-2.4.24.patch ext3-extents-2.4.24.patch ext3-extents-asyncdel-2.4.24.patch ext3-mballoc-2.4.24.patch -export-zap-page-range.patch export_num_siblings.patch ext3-nlinks-2.4.24.patch +export-show_task-2.4-vanilla.patch diff --git a/lustre/ldlm/l_lock.c b/lustre/ldlm/l_lock.c index e07c25e..af6a15b 100644 --- a/lustre/ldlm/l_lock.c +++ b/lustre/ldlm/l_lock.c @@ -124,9 +124,7 @@ void l_check_no_ns_lock(struct ldlm_namespace *ns) if (l_has_lock(&ns->ns_lock) && time_after(jiffies, next_msg)) { CERROR("namespace %s lock held illegally; tell phil\n", ns->ns_name); -#if (LUSTRE_KERNEL_VERSION >= 30) - CERROR(portals_debug_dumpstack()); -#endif + portals_debug_dumpstack(NULL); next_msg = jiffies + 60 * HZ; } } diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index b521ff1..1dacf83 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -39,6 +39,9 @@ static struct list_head ldlm_flock_waitq = LIST_HEAD_INIT(ldlm_flock_waitq); +int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, + void *data, int flag); + /** * list_for_remaining_safe - iterate over the remaining entries in a list * and safeguard against removal of a list entry. @@ -75,6 +78,8 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags) LDLM_DEBUG(lock, "ldlm_flock_destroy(mode: %d, flags: 0x%x)", mode, flags); + LASSERT(list_empty(&lock->l_flock_waitq)); + list_del_init(&lock->l_res_link); if (flags == LDLM_FL_WAIT_NOREPROC) { /* client side - set a flag to prevent sending a CANCEL */ @@ -125,6 +130,7 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, struct ldlm_lock *new = req; struct ldlm_lock *new2 = NULL; ldlm_mode_t mode = req->l_req_mode; + int local = ns->ns_client; int added = (mode == LCK_NL); int overlaps = 0; ENTRY; @@ -136,8 +142,14 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, *err = ELDLM_OK; - /* No blocking ASTs are sent for Posix file & record locks */ - req->l_blocking_ast = NULL; + if (local) { + /* No blocking ASTs are sent to the clients for + * Posix file & record locks */ + req->l_blocking_ast = NULL; + } else { + /* Called on the server for lock cancels. */ + req->l_blocking_ast = ldlm_flock_blocking_ast; + } if ((*flags == LDLM_FL_WAIT_NOREPROC) || (mode == LCK_NL)) { /* This loop determines where this processes locks start @@ -219,6 +231,10 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, RETURN(LDLM_ITER_STOP); } + /* In case we had slept on this lock request take it off of the + * deadlock detection waitq. */ + list_del_init(&req->l_flock_waitq); + /* Scan the locks owned by this process that overlap this request. * We may have to merge or split existing locks. */ @@ -412,10 +428,13 @@ ldlm_flock_interrupted_wait(void *data) ENTRY; lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock; + + /* take lock off the deadlock detection waitq. */ + list_del_init(&lock->l_flock_waitq); + ldlm_lock_decref_internal(lock, lock->l_req_mode); ldlm_lock2handle(lock, &lockh); rc = ldlm_cli_cancel(&lockh); - CDEBUG(D_DLMTRACE, "ldlm_cli_cancel: %d\n", rc); EXIT; } @@ -450,7 +469,7 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, " "sleeping"); - ldlm_lock_dump(D_OTHER, lock, 0); + ldlm_lock_dump(D_DLMTRACE, lock, 0); fwd.fwd_lock = lock; obd = class_exp2obd(lock->l_conn_export); @@ -486,7 +505,7 @@ granted: ns = lock->l_resource->lr_namespace; l_lock(&ns->ns_lock); - /* take data off of deadlock detection waitq. */ + /* take lock off the deadlock detection waitq. */ list_del_init(&lock->l_flock_waitq); /* ldlm_lock_enqueue() has already placed lock on the granted list. */ @@ -519,3 +538,21 @@ granted: l_unlock(&ns->ns_lock); RETURN(0); } + +int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, + void *data, int flag) +{ + struct ldlm_namespace *ns; + ENTRY; + + LASSERT(lock); + LASSERT(flag == LDLM_CB_CANCELING); + + ns = lock->l_resource->lr_namespace; + + /* take lock off the deadlock detection waitq. */ + l_lock(&ns->ns_lock); + list_del_init(&lock->l_flock_waitq); + l_unlock(&ns->ns_lock); + RETURN(0); +} diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 9446bfa..8f5e587 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1301,7 +1301,7 @@ static int ldlm_setup(void) ldlm_state->ldlm_cb_service = ptlrpc_init_svc(LDLM_NBUFS, LDLM_BUFSIZE, LDLM_MAXREQSIZE, LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, - ldlm_callback_handler, "ldlm_cbd", + 1500, ldlm_callback_handler, "ldlm_cbd", ldlm_svc_proc_dir); if (!ldlm_state->ldlm_cb_service) { @@ -1312,7 +1312,7 @@ static int ldlm_setup(void) ldlm_state->ldlm_cancel_service = ptlrpc_init_svc(LDLM_NBUFS, LDLM_BUFSIZE, LDLM_MAXREQSIZE, LDLM_CANCEL_REQUEST_PORTAL, - LDLM_CANCEL_REPLY_PORTAL, + LDLM_CANCEL_REPLY_PORTAL, 30000, ldlm_cancel_handler, "ldlm_canceld", ldlm_svc_proc_dir); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 9b241f2..0981545 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -232,6 +232,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ldlm_reply *reply; int rc, size[2] = {sizeof(*body), lvb_len}, req_passed_in = 1; int is_replay = *flags & LDLM_FL_REPLAY; + int cleanup_phase = 0; ENTRY; if (exp == NULL) { @@ -253,7 +254,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, lock = ldlm_lock_create(ns, NULL, res_id, type, mode, blocking, completion, glimpse, data, lvb_len); if (lock == NULL) - GOTO(out_nolock, rc = -ENOMEM); + RETURN(-ENOMEM); /* for the local lock, add the reference */ ldlm_lock_addref_internal(lock, mode); ldlm_lock2handle(lock, lockh); @@ -266,11 +267,14 @@ int ldlm_cli_enqueue(struct obd_export *exp, LDLM_DEBUG(lock, "client-side enqueue START"); } + /* lock not sent to server yet */ + cleanup_phase = 2; + if (req == NULL) { req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 1, size, NULL); if (req == NULL) - GOTO(out_lock, rc = -ENOMEM); + GOTO(cleanup, rc = -ENOMEM); req_passed_in = 0; } else if (req->rq_reqmsg->buflens[0] != sizeof(*body)) LBUG(); @@ -314,21 +318,24 @@ int ldlm_cli_enqueue(struct obd_export *exp, tmplvb = lustre_swab_repbuf(req, 1, lvb_len, lvb_swabber); if (tmplvb == NULL) - GOTO(out_lock, rc = -EPROTO); + GOTO(cleanup, rc = -EPROTO); if (lvb != NULL) memcpy(lvb, tmplvb, lvb_len); } } - GOTO(out_lock, rc); + GOTO(cleanup, rc); } reply = lustre_swab_repbuf(req, 0, sizeof(*reply), lustre_swab_ldlm_reply); if (reply == NULL) { CERROR("Can't unpack ldlm_reply\n"); - GOTO(out_lock, rc = -EPROTO); + GOTO(cleanup, rc = -EPROTO); } + /* lock enqueued on the server */ + cleanup_phase = 1; + memcpy(&lock->l_remote_handle, &reply->lock_handle, sizeof(lock->l_remote_handle)); *flags = reply->lock_flags; @@ -359,7 +366,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, reply->lock_desc.l_resource.lr_name); if (lock->l_resource == NULL) { LBUG(); - GOTO(out_lock, rc = -ENOMEM); + GOTO(cleanup, rc = -ENOMEM); } LDLM_DEBUG(lock, "client-side enqueue, new resource"); } @@ -384,7 +391,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, void *tmplvb; tmplvb = lustre_swab_repbuf(req, 1, lvb_len, lvb_swabber); if (tmplvb == NULL) - GOTO(out_lock, rc = -EPROTO); + GOTO(cleanup, rc = -EPROTO); memcpy(lock->l_lvb_data, tmplvb, lvb_len); } @@ -405,13 +412,17 @@ int ldlm_cli_enqueue(struct obd_export *exp, LDLM_DEBUG(lock, "client-side enqueue END"); EXIT; - out_lock: - if (rc) - failed_lock_cleanup(ns, lock, lockh, mode); - if (!req_passed_in && req != NULL) - ptlrpc_req_finished(req); +cleanup: + switch (cleanup_phase) { + case 2: + if (rc) + failed_lock_cleanup(ns, lock, lockh, mode); + case 1: + if (!req_passed_in && req != NULL) + ptlrpc_req_finished(req); + } + LDLM_LOCK_PUT(lock); - out_nolock: return rc; } diff --git a/lustre/llite/file.c b/lustre/llite/file.c index b0e16c3..5bd0eab 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -650,8 +650,8 @@ int ll_glimpse_size(struct inode *inode) inode->i_size = lov_merge_size(lli->lli_smd, 0); inode->i_blocks = lov_merge_blocks(lli->lli_smd); - LTIME_S(inode->i_mtime) = lov_merge_mtime(lli->lli_smd, - LTIME_S(inode->i_mtime)); + LTIME_S(inode->i_mtime) = + lov_merge_mtime(lli->lli_smd, LTIME_S(inode->i_mtime)); CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %lu\n", inode->i_size, inode->i_blocks); @@ -701,10 +701,10 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, inode->i_size = lov_merge_size(lsm, 1); up(&inode->i_sem); } - if (rc > 0) + + if (rc == 0) LTIME_S(inode->i_mtime) = lov_merge_mtime(lsm, LTIME_S(inode->i_mtime)); - RETURN(rc); } @@ -1226,12 +1226,21 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) switch (cmd) { case F_SETLKW: +#ifdef F_SETLKW64 + case F_SETLKW64: +#endif flags = 0; break; case F_SETLK: +#ifdef F_SETLK64 + case F_SETLK64: +#endif flags = LDLM_FL_BLOCK_NOWAIT; break; case F_GETLK: +#ifdef F_GETLK64 + case F_GETLK64: +#endif flags = LDLM_FL_TEST_LOCK; /* Save the old mode so that if the mode in the lock changes we * can decrement the appropriate reader or writer refcount. */ @@ -1247,7 +1256,8 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) flags, mode, flock.l_flock.start, flock.l_flock.end); obddev = sbi->ll_mdc_exp->exp_obd; - rc = ldlm_cli_enqueue(sbi->ll_mdc_exp, NULL, obddev->obd_namespace, + rc = ldlm_cli_enqueue(obddev->obd_self_export, NULL, + obddev->obd_namespace, res_id, LDLM_FLOCK, &flock, mode, &flags, NULL, ldlm_flock_completion_ast, NULL, file_lock, NULL, 0, NULL, &lockh); @@ -1384,7 +1394,7 @@ struct file_operations ll_file_operations = { .sendfile = generic_file_sendfile, #endif .fsync = ll_fsync, - //.lock ll_file_flock + .lock = ll_file_flock }; struct inode_operations ll_file_inode_operations = { diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index e5352ed..b86e05b 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -134,10 +134,20 @@ struct ll_async_page { /* only trust these if the page lock is providing exclusion */ unsigned llap_write_queued:1, llap_defer_uptodate:1, + llap_origin:3, llap_ra_used:1; struct list_head llap_proc_item; }; +enum { + LLAP_ORIGIN_UNKNOWN = 0, + LLAP_ORIGIN_READPAGE, + LLAP_ORIGIN_READAHEAD, + LLAP_ORIGIN_COMMIT_WRITE, + LLAP_ORIGIN_WRITEPAGE, + LLAP__ORIGIN_MAX, +}; + /* llite/lproc_llite.c */ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, struct super_block *sb, char *osc, char *mdc); @@ -163,12 +173,13 @@ void ll_prepare_mdc_op_data(struct mdc_op_data *, /* llite/rw.c */ int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to); int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to); +int ll_writepage(struct page *page); void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa); void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc); void ll_removepage(struct page *page); int ll_readpage(struct file *file, struct page *page); struct ll_async_page *llap_from_cookie(void *cookie); -struct ll_async_page *llap_from_page(struct page *page); +struct ll_async_page *llap_from_page(struct page *page, unsigned origin); struct ll_async_page *llap_cast_private(struct page *page); void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras); void ll_ra_accounting(struct page *page, struct address_space *mapping); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index c6e54b4..b785c87 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -196,7 +196,8 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc) /* making vm readahead 0 for 2.4.x. In the case of 2.6.x, backing dev info assigned to inode mapping is used for determining maximal readahead. */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \ + !defined(KERNEL_HAS_AS_MAX_READAHEAD) /* bug 2805 - set VM readahead to zero */ vm_max_readahead = vm_min_readahead = 0; #endif @@ -508,6 +509,35 @@ out: RETURN(rc); } +static void lustre_manual_cleanup(struct ll_sb_info *sbi) +{ + struct lustre_cfg lcfg; + struct obd_device *obd; + int next = 0; + + while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL) + { + int err; + + LCFG_INIT(lcfg, LCFG_CLEANUP, obd->obd_name); + err = class_process_config(&lcfg); + if (err) { + CERROR("cleanup failed: %s\n", obd->obd_name); + //continue; + } + + LCFG_INIT(lcfg, LCFG_DETACH, obd->obd_name); + err = class_process_config(&lcfg); + if (err) { + CERROR("detach failed: %s\n", obd->obd_name); + //continue; + } + } + + if (sbi->ll_lmd != NULL) + class_del_profile(sbi->ll_lmd->lmd_profile); +} + int lustre_fill_super(struct super_block *sb, void *data, int silent) { struct lustre_mount_data * lmd = data; @@ -618,8 +648,10 @@ out_free: err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg, 0); - if (err < 0) + if (err < 0) { CERROR("Unable to process log: %s\n", cln_prof); + lustre_manual_cleanup(sbi); + } OBD_FREE(cln_prof, len); OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance)+ 1); } @@ -630,35 +662,6 @@ out_free: goto out_dev; } /* lustre_fill_super */ -static void lustre_manual_cleanup(struct ll_sb_info *sbi) -{ - struct lustre_cfg lcfg; - struct obd_device *obd; - int next = 0; - - while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL) - { - int err; - - LCFG_INIT(lcfg, LCFG_CLEANUP, obd->obd_name); - err = class_process_config(&lcfg); - if (err) { - CERROR("cleanup failed: %s\n", obd->obd_name); - //continue; - } - - LCFG_INIT(lcfg, LCFG_DETACH, obd->obd_name); - err = class_process_config(&lcfg); - if (err) { - CERROR("detach failed: %s\n", obd->obd_name); - //continue; - } - } - - if (sbi->ll_lmd != NULL) - class_del_profile(sbi->ll_lmd->lmd_profile); -} - void lustre_put_super(struct super_block *sb) { struct obd_device *obd; diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index d390eab..e09a703 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -494,8 +494,8 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v) /* 2.4 doesn't seem to have SEQ_START_TOKEN, so we implement * it in our own state */ if (dummy_llap->llap_magic == 0) { - seq_printf(seq, "generation | llap .cookie | page "); - seq_printf(seq, "inode .index [ page flags ]\n"); + seq_printf(seq, "generation | llap cookie origin | page "); + seq_printf(seq, "inode index count [ page flags ]\n"); return 0; } @@ -505,11 +505,23 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v) if (llap != NULL) { int has_flags = 0; struct page *page = llap->llap_page; - - seq_printf(seq, "%lu | %p %p | %p %p %lu [", - sbi->ll_pglist_gen, + static char *origins[] = { + [LLAP_ORIGIN_UNKNOWN] = "--", + [LLAP_ORIGIN_READPAGE] = "rp", + [LLAP_ORIGIN_READAHEAD] = "ra", + [LLAP_ORIGIN_COMMIT_WRITE] = "cw", + [LLAP_ORIGIN_WRITEPAGE] = "wp", + }; + + LASSERTF(llap->llap_origin < LLAP__ORIGIN_MAX, "%u\n", + llap->llap_origin); + + seq_printf(seq, "%lu | %p %p %s | %p %p %lu %u [", + sbi->ll_pglist_gen, llap, llap->llap_cookie, - page, page->mapping->host, page->index); + origins[llap->llap_origin], + page, page->mapping->host, page->index, + page_count(page)); seq_page_flag(seq, page, locked, has_flags); seq_page_flag(seq, page, error, has_flags); seq_page_flag(seq, page, referenced, has_flags); diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index bdc9b10..3d3c7ec 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -360,7 +360,7 @@ struct ll_async_page *llap_cast_private(struct page *page) } /* XXX have the exp be an argument? */ -struct ll_async_page *llap_from_page(struct page *page) +struct ll_async_page *llap_from_page(struct page *page, unsigned origin) { struct ll_async_page *llap; struct obd_export *exp; @@ -369,9 +369,11 @@ struct ll_async_page *llap_from_page(struct page *page) int rc; ENTRY; + LASSERTF(origin < LLAP__ORIGIN_MAX, "%u\n", origin); + llap = llap_cast_private(page); if (llap != NULL) - RETURN(llap); + GOTO(out, llap); exp = ll_i2obdexp(page->mapping->host); if (exp == NULL) @@ -400,6 +402,8 @@ struct ll_async_page *llap_from_page(struct page *page) list_add_tail(&llap->llap_proc_item, &sbi->ll_pglist); spin_unlock(&sbi->ll_lock); +out: + llap->llap_origin = origin; RETURN(llap); } @@ -492,7 +496,7 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from, CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n", inode, page, from, to, page->index); - llap = llap_from_page(page); + llap = llap_from_page(page, LLAP_ORIGIN_COMMIT_WRITE); if (IS_ERR(llap)) RETURN(PTR_ERR(llap)); @@ -640,7 +644,7 @@ void ll_removepage(struct page *page) return; } - llap = llap_from_page(page); + llap = llap_from_page(page, 0); if (IS_ERR(llap)) { CERROR("page %p ind %lu couldn't find llap: %ld\n", page, page->index, PTR_ERR(llap)); @@ -727,7 +731,7 @@ void ll_ra_accounting(struct page *page, struct address_space *mapping) { struct ll_async_page *llap; - llap = llap_from_page(page); + llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE); if (IS_ERR(llap)) return; @@ -804,7 +808,7 @@ static int ll_readahead(struct ll_readahead_state *ras, /* we do this first so that we can see the page in the /proc * accounting */ - llap = llap_from_page(page); + llap = llap_from_page(page, LLAP_ORIGIN_READAHEAD); if (IS_ERR(llap) || llap->llap_defer_uptodate) goto next_page; @@ -983,7 +987,7 @@ int ll_readpage(struct file *filp, struct page *page) if (exp == NULL) GOTO(out, rc = -EINVAL); - llap = llap_from_page(page); + llap = llap_from_page(page, LLAP_ORIGIN_READPAGE); if (IS_ERR(llap)) GOTO(out, rc = PTR_ERR(llap)); diff --git a/lustre/llite/rw24.c b/lustre/llite/rw24.c index 3993af4..9c4e2a2 100644 --- a/lustre/llite/rw24.c +++ b/lustre/llite/rw24.c @@ -64,7 +64,7 @@ static int ll_writepage_24(struct page *page) if (exp == NULL) GOTO(out, rc = -EINVAL); - llap = llap_from_page(page); + llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE); if (IS_ERR(llap)) GOTO(out, rc = PTR_ERR(llap)); @@ -177,6 +177,13 @@ static int ll_direct_IO_24(int rw, RETURN(rc); } +#ifdef KERNEL_HAS_AS_MAX_READAHEAD +static int ll_max_readahead(struct inode *inode) +{ + return 0; +} +#endif + struct address_space_operations ll_aops = { .readpage = ll_readpage, .direct_IO = ll_direct_IO_24, @@ -185,5 +192,8 @@ struct address_space_operations ll_aops = { .commit_write = ll_commit_write, .removepage = ll_removepage, .sync_page = NULL, - .bmap = NULL + .bmap = NULL, +#ifdef KERNEL_HAS_AS_MAX_READAHEAD + .max_readahead = ll_max_readahead, +#endif }; diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 71964de..b585b09 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -66,7 +66,7 @@ static int ll_writepage_26(struct page *page, struct writeback_control *wbc) if (exp == NULL) GOTO(out, rc = -EINVAL); - llap = llap_from_page(page); + llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE); if (IS_ERR(llap)) GOTO(out, rc = PTR_ERR(llap)); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 634cdac..be4d153 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -866,6 +866,10 @@ static int mds_statfs(struct ptlrpc_request *req) int rc, size = sizeof(struct obd_statfs); ENTRY; + /* This will trigger a watchdog timeout */ + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP, + (MDS_SERVICE_WATCHDOG_TIMEOUT / 1000) + 1); + rc = lustre_pack_reply(req, 1, &size, NULL); if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_STATFS_PACK)) { CERROR("mds: statfs lustre_pack_reply failed: rc = %d\n", rc); @@ -1899,6 +1903,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) mds->mds_service = ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, + MDS_SERVICE_WATCHDOG_TIMEOUT, mds_handle, "mds", obd->obd_proc_entry); if (!mds->mds_service) { @@ -1914,6 +1919,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) mds->mds_setattr_service = ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE, MDS_SETATTR_PORTAL, MDC_REPLY_PORTAL, + MDS_SERVICE_WATCHDOG_TIMEOUT, mds_handle, "mds_setattr", obd->obd_proc_entry); if (!mds->mds_setattr_service) { @@ -1929,6 +1935,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) mds->mds_readpage_service = ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE, MDS_READPAGE_PORTAL, MDC_REPLY_PORTAL, + MDS_SERVICE_WATCHDOG_TIMEOUT, mds_handle, "mds_readpage", obd->obd_proc_entry); if (!mds->mds_readpage_service) { diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index 892f315..c251d2f 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -7,6 +7,8 @@ #include +#define MDS_SERVICE_WATCHDOG_TIMEOUT 30000 + #define MAX_ATIME_DIFF 60 struct mds_filter_data { diff --git a/lustre/mgmt/mgmt_svc.c b/lustre/mgmt/mgmt_svc.c index 88fbf48..0d92b2f 100644 --- a/lustre/mgmt/mgmt_svc.c +++ b/lustre/mgmt/mgmt_svc.c @@ -89,10 +89,9 @@ static int mgmt_setup(struct obd_device *obd, obd_count len, void *buf) RETURN(-EALREADY); mgmt_service = - ptlrpc_init_svc(MGMT_NBUFS, MGMT_BUFSIZE, MGMT_MAXREQSIZE, - MGMT_REQUEST_PORTAL, MGMT_REPLY_PORTAL, - mgmt_handler, "mgmt", - obd->obd_proc_entry); + ptlrpc_init_svc(MGMT_NBUFS, MGMT_BUFSIZE, MGMT_MAXREQSIZE, + MGMT_REQUEST_PORTAL, MGMT_REPLY_PORTAL, 30000, + mgmt_handler, "mgmt", obd->obd_proc_entry); if (!mgmt_service) { CERROR("Failed to start mgmt service\n"); RETURN(-ENOMEM); diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index e61023c..4106977 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -646,7 +646,7 @@ static void cleanup_obdclass(void) * kernel patch */ #include #define LUSTRE_MIN_VERSION 32 -#define LUSTRE_MAX_VERSION 40 +#define LUSTRE_MAX_VERSION 42 #if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION) # error Cannot continue: Your Lustre kernel patch is older than the sources #elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION) diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 69449fe9..2f1ed8c 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -559,7 +559,6 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) int objcount, niocount, npages; int comms_error = 0; int rc, swab, i, j; - struct timeval start; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK)) @@ -1139,7 +1138,7 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf) ost->ost_service = ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE, - OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, + OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, 30000, ost_handle, "ost", obd->obd_proc_entry); if (ost->ost_service == NULL) { @@ -1154,7 +1153,7 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf) ost->ost_create_service = ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE, - OST_CREATE_PORTAL, OSC_REPLY_PORTAL, + OST_CREATE_PORTAL, OSC_REPLY_PORTAL, 30000, ost_handle, "ost_create", obd->obd_proc_entry); if (ost->ost_create_service == NULL) { diff --git a/lustre/portals/archdep.m4 b/lustre/portals/archdep.m4 index 021fa68..ae2964d 100644 --- a/lustre/portals/archdep.m4 +++ b/lustre/portals/archdep.m4 @@ -720,6 +720,41 @@ if test x$enable_modules != xno ; then AC_MSG_RESULT([no]) ]) + # ------------ kallsyms (so software watchdogs produce useful stacks) + AC_MSG_CHECKING([if kallsyms is enabled]) + LUSTRE_MODULE_TRY_COMPILE( + [ + #include + ],[ + #ifndef CONFIG_KALLSYMS + #error CONFIG_KALLSYMS is not #defined + #endif + ],[ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + if test "x$ARCH_UM" = "x" ; then + AC_MSG_ERROR([Lustre requires that CONFIG_KALLSYMS is enabled in your kernel.]) + fi + ]) + + # ------------ check for our show_task patch + AC_MSG_CHECKING([if kernel exports show_task]) + have_show_task=0 + for file in ksyms sched ; do + if grep -q "EXPORT_SYMBOL(show_task)" \ + "$LINUX/kernel/$file.c" 2>/dev/null ; then + have_show_task=1 + break + fi + done + if test x$have_show_task = x1 ; then + AC_DEFINE(HAVE_SHOW_TASK, 1, [show_task is exported]) + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) + fi + case $BACKINGFS in ext3) # --- Check that ext3 and ext3 xattr are enabled in the kernel diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 4e24c71d..1162c1b 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -92,7 +92,7 @@ do { \ #define LBUG_WITH_LOC(file, func, line) \ do { \ CEMERG("LBUG\n"); \ - CERROR("STACK: %s\n", portals_debug_dumpstack()); \ + portals_debug_dumpstack(NULL); \ portals_debug_dumplog(); \ portals_run_lbug_upcall(file, func, line); \ set_task_state(current, TASK_UNINTERRUPTIBLE); \ @@ -243,9 +243,7 @@ extern struct prof_ent prof_ents[MAX_PROFS]; #endif /* PORTALS_PROFILING */ /* debug.c */ -extern spinlock_t stack_backtrace_lock; - -char *portals_debug_dumpstack(void); +void portals_debug_dumpstack(struct task_struct *tsk); void portals_run_upcall(char **argv); void portals_run_lbug_upcall(char * file, const char *fn, const int line); void portals_debug_dumplog(void); diff --git a/lustre/portals/include/linux/libcfs.h b/lustre/portals/include/linux/libcfs.h index e3d58dd..33abae9 100644 --- a/lustre/portals/include/linux/libcfs.h +++ b/lustre/portals/include/linux/libcfs.h @@ -370,6 +370,34 @@ int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand); #endif -#define _LIBCFS_H +#ifdef __KERNEL__ +/* libcfs watchdogs */ +struct lc_watchdog; + +/* Just use the default handler (dumplog) */ +#define LC_WATCHDOG_DEFAULT_CB NULL + +/* Add a watchdog which fires after "time" milliseconds of delay. You have to + * touch it once to enable it. */ +struct lc_watchdog *lc_watchdog_add(int time, + void (*cb)(struct lc_watchdog *, + struct task_struct *, + void *), + void *data); + +/* Enables a watchdog and resets its timer. */ +void lc_watchdog_touch(struct lc_watchdog *lcw); + +/* Disable a watchdog; touch it to restart it. */ +void lc_watchdog_disable(struct lc_watchdog *lcw); + +/* Clean up the watchdog */ +void lc_watchdog_delete(struct lc_watchdog *lcw); + +/* Dump a debug log */ +void lc_watchdog_dumplog(struct lc_watchdog *lcw, + struct task_struct *tsk, + void *data); +#endif /* !__KERNEL__ */ #endif /* _LIBCFS_H */ diff --git a/lustre/portals/include/linux/portals_compat25.h b/lustre/portals/include/linux/portals_compat25.h index 5a43a45..e4831aa 100644 --- a/lustre/portals/include/linux/portals_compat25.h +++ b/lustre/portals/include/linux/portals_compat25.h @@ -27,8 +27,10 @@ # define USERMODEHELPER(path, argv, envp) \ call_usermodehelper(path, argv, envp, 1) # define RECALC_SIGPENDING recalc_sigpending() +# define CLEAR_SIGPENDING clear_tsk_thread_flag(current, \ + TIF_SIGPENDING) # define CURRENT_SECONDS get_seconds() -# define smp_num_cpus NR_CPUS +# define smp_num_cpus num_online_cpus() #elif defined(CONFIG_RH_2_4_20) /* RH 2.4.x */ @@ -40,11 +42,9 @@ # define USERMODEHELPER(path, argv, envp) \ call_usermodehelper(path, argv, envp) # define RECALC_SIGPENDING recalc_sigpending() +# define CLEAR_SIGPENDING (current->sigpending = 0) # define CURRENT_SECONDS CURRENT_TIME -# define kernel_text_address(addr) is_kernel_text_address(addr) -extern int is_kernel_text_address(unsigned long addr); - #else /* 2.4.x */ # define SIGNAL_MASK_LOCK(task, flags) \ @@ -54,11 +54,9 @@ extern int is_kernel_text_address(unsigned long addr); # define USERMODEHELPER(path, argv, envp) \ call_usermodehelper(path, argv, envp) # define RECALC_SIGPENDING recalc_sigpending(current) +# define CLEAR_SIGPENDING (current->sigpending = 0) # define CURRENT_SECONDS CURRENT_TIME -# define kernel_text_address(addr) is_kernel_text_address(addr) -extern int is_kernel_text_address(unsigned long addr); - #endif #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) diff --git a/lustre/portals/libcfs/Makefile.in b/lustre/portals/libcfs/Makefile.in index 0967123..15fff12 100644 --- a/lustre/portals/libcfs/Makefile.in +++ b/lustre/portals/libcfs/Makefile.in @@ -1,4 +1,4 @@ MODULES = libcfs -libcfs-objs := debug.o lwt.o module.o proc.o tracefile.o +libcfs-objs := debug.o lwt.o module.o proc.o tracefile.o watchdog.o @INCLUDE_RULES@ diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c index f571958..1e81801 100644 --- a/lustre/portals/libcfs/debug.c +++ b/lustre/portals/libcfs/debug.c @@ -85,14 +85,9 @@ static char debug_file_name[1024]; static int handled_panic; /* to avoid recursive calls to notifiers */ char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall"; -int portals_do_debug_dumplog(void *arg) +void portals_debug_dumplog_internal(void *arg) { - void *journal_info; - - kportal_daemonize(""); - - reparent_to_init(); - journal_info = current->journal_info; + void *journal_info = current->journal_info; current->journal_info = NULL; snprintf(debug_file_name, sizeof(debug_file_path) - 1, @@ -101,6 +96,13 @@ int portals_do_debug_dumplog(void *arg) tracefile_dump_all_pages(debug_file_name); current->journal_info = journal_info; +} + +int portals_debug_dumplog_thread(void *arg) +{ + kportal_daemonize(""); + reparent_to_init(); + portals_debug_dumplog_internal(arg); wake_up(&debug_ctlwq); return 0; } @@ -117,7 +119,8 @@ void portals_debug_dumplog(void) set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&debug_ctlwq, &wait); - rc = kernel_thread(portals_do_debug_dumplog, (void *)(long)current->pid, + rc = kernel_thread(portals_debug_dumplog_thread, + (void *)(long)current->pid, CLONE_VM | CLONE_FS | CLONE_FILES); if (rc < 0) printk(KERN_ERR "LustreError: cannot start log dump thread: " @@ -293,93 +296,34 @@ char *portals_id2str(int nal, ptl_process_id_t id, char *str) } #ifdef __KERNEL__ -char stack_backtrace[LUSTRE_TRACE_SIZE]; -spinlock_t stack_backtrace_lock = SPIN_LOCK_UNLOCKED; -#if defined(__arch_um__) -char *portals_debug_dumpstack(void) +void portals_debug_dumpstack(struct task_struct *tsk) { +#if defined(__arch_um__) + if (tsk != NULL) + CWARN("stack dump for process %d requested; I'll wake up gdb.\n", + tsk->pid); asm("int $3"); - return "dump stack\n"; -} - -#elif defined(__i386__) - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -extern int lookup_symbol(unsigned long address, char *buf, int buflen); -const char *kallsyms_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, char *namebuf) -{ - int rc = lookup_symbol(addr, namebuf, 128); - if (rc == -ENOSYS) - return NULL; - return namebuf; -} +#elif defined(HAVE_SHOW_TASK) + /* this is exported by lustre kernel version 42 */ + extern void show_task(struct task_struct *); + + if (tsk == NULL) + tsk = current; + CWARN("showing stack for process %d\n", tsk->pid); + show_task(tsk); +#else + CWARN("can't show stack: kernel doesn't export show_task\n"); #endif - -char *portals_debug_dumpstack(void) -{ - unsigned long esp = current->thread.esp, addr; - unsigned long *stack = (unsigned long *)&esp; - char *buf = stack_backtrace, *pbuf = buf; - int size; - - /* User space on another CPU? */ - if ((esp ^ (unsigned long)current) & (PAGE_MASK << 1)){ - buf[0] = '\0'; - goto out; - } - - size = sprintf(pbuf, " Call Trace: "); - pbuf += size; - while (((long) stack & (THREAD_SIZE - 1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { - const char *sym_name; - char *modname, buffer[128]; - unsigned long junk, offset; - - sym_name = kallsyms_lookup(addr, &junk, &offset, - &modname, buffer); - if (sym_name == NULL) { - if (buf + LUSTRE_TRACE_SIZE <= pbuf + 12) - break; - size = sprintf(pbuf, "[<%08lx>] ", addr); - } else { - if (buf + LUSTRE_TRACE_SIZE - /* fix length + sizeof('\0') */ - <= pbuf + strlen(buffer) + 28 + 1) - break; - size = sprintf(pbuf, "([<%08lx>] %s (0x%p)) ", - addr, buffer, stack - 1); - } - pbuf += size; - } - } -out: - return buf; -} - -#else /* !__arch_um__ && !__i386__ */ - -char *portals_debug_dumpstack(void) -{ - char *buf = stack_backtrace; - buf[0] = '\0'; - return buf; } -#endif /* __arch_um__ */ struct task_struct *portals_current(void) { CWARN("current task struct is %p\n", current); return current; } -EXPORT_SYMBOL(stack_backtrace_lock); EXPORT_SYMBOL(portals_debug_dumpstack); EXPORT_SYMBOL(portals_current); #endif /* __KERNEL__ */ diff --git a/lustre/portals/libcfs/watchdog.c b/lustre/portals/libcfs/watchdog.c new file mode 100644 index 0000000..6faebfb --- /dev/null +++ b/lustre/portals/libcfs/watchdog.c @@ -0,0 +1,388 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Jacob Berkman + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include +#include + + + +struct lc_watchdog { + struct timer_list lcw_timer; /* kernel timer */ + struct list_head lcw_list; + struct timeval lcw_last_touched; + struct task_struct *lcw_task; + + void (*lcw_callback)(struct lc_watchdog *, + struct task_struct *, + void *data); + void *lcw_data; + + int lcw_pid; + int lcw_time; /* time until watchdog fires, in ms */ + + enum { + LC_WATCHDOG_DISABLED, + LC_WATCHDOG_ENABLED, + LC_WATCHDOG_EXPIRED + } lcw_state; +}; + +/* + * The dispatcher will complete lcw_start_completion when it starts, + * and lcw_stop_completion when it exits. + * Complete lcw_event_completion to signal timer callback dispatches. + */ +struct completion lcw_start_completion; +struct completion lcw_event_completion; +struct completion lcw_stop_completion; + +/* + * Set this and complete lcw_event_completion to stop the dispatcher. + */ +enum { + LCW_FLAG_STOP = 0 +}; +static unsigned long lcw_flags = 0; + +/* + * Number of outstanding watchdogs. + * When it hits 1, we start the dispatcher. + * When it hits 0, we stop the distpatcher. + */ +static __u32 lcw_refcount = 0; +static DECLARE_MUTEX(lcw_refcount_sem); + +/* + * List of timers that have fired that need their callbacks run by the + * dispatcher. + */ +static spinlock_t lcw_pending_timers_lock = SPIN_LOCK_UNLOCKED; +static struct list_head lcw_pending_timers = \ + LIST_HEAD_INIT(lcw_pending_timers); + +static struct task_struct *lcw_lookup_task(struct lc_watchdog *lcw) +{ + struct task_struct *tsk; + unsigned long flags; + ENTRY; + + read_lock_irqsave(&tasklist_lock, flags); + tsk = find_task_by_pid(lcw->lcw_pid); + read_unlock_irqrestore(&tasklist_lock, flags); + if (!tsk) { + CWARN("Process %d was not found in the task list; " + "watchdog callback may be incomplete\n", lcw->lcw_pid); + } else if (tsk != lcw->lcw_task) { + tsk = NULL; + CWARN("The current process %d did not set the watchdog; " + "watchdog callback may be incomplete\n", lcw->lcw_pid); + } + + RETURN(tsk); +} + +static void lcw_cb(unsigned long data) +{ + struct lc_watchdog *lcw = (struct lc_watchdog *)data; + struct task_struct *tsk; + unsigned long flags; + + ENTRY; + + if (lcw->lcw_state != LC_WATCHDOG_ENABLED) { + EXIT; + return; + } + + lcw->lcw_state = LC_WATCHDOG_EXPIRED; + + CWARN("Watchdog triggered for pid %d: it was inactive for %dus\n", + lcw->lcw_pid, (lcw->lcw_time * 1000) / HZ); + + tsk = lcw_lookup_task(lcw); + if (tsk != NULL) + portals_debug_dumpstack(tsk); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + if (list_empty(&lcw->lcw_list)) { + list_add(&lcw->lcw_list, &lcw_pending_timers); + complete(&lcw_event_completion); + } + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + EXIT; +} + +static int lcw_dispatch_main(void *data) +{ + int rc = 0; + unsigned long flags; + struct lc_watchdog *lcw; + struct task_struct *tsk; + + ENTRY; + + kportal_daemonize("lc_watchdogd"); + + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + + complete(&lcw_start_completion); + + while (1) { + wait_for_completion(&lcw_event_completion); + CDEBUG(D_INFO, "Watchdog got woken up...\n"); + if (test_bit(LCW_FLAG_STOP, &lcw_flags)) { + CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n"); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + rc = !list_empty(&lcw_pending_timers); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + if (rc) { + CERROR("pending timers list was not empty at " + "time of watchdog dispatch shutdown\n"); + } + break; + } + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + while (!list_empty(&lcw_pending_timers)) { + + lcw = list_entry(lcw_pending_timers.next, + struct lc_watchdog, + lcw_list); + list_del_init(&lcw->lcw_list); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + CDEBUG(D_INFO, "found lcw for pid %d\n", lcw->lcw_pid); + + if (lcw->lcw_state != LC_WATCHDOG_DISABLED) { + /* + * sanity check the task against our + * watchdog + */ + tsk = lcw_lookup_task(lcw); + lcw->lcw_callback(lcw, tsk, lcw->lcw_data); + } + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + } + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + } + + complete(&lcw_stop_completion); + + RETURN(rc); +} + +static void lcw_dispatch_start(void) +{ + int rc; + + ENTRY; + LASSERT(lcw_refcount == 1); + + init_completion(&lcw_stop_completion); + init_completion(&lcw_start_completion); + init_completion(&lcw_event_completion); + + CDEBUG(D_INFO, "starting dispatch thread\n"); + rc = kernel_thread(lcw_dispatch_main, NULL, 0); + if (rc < 0) { + CERROR("error spawning watchdog dispatch thread: %d\n", rc); + EXIT; + return; + } + wait_for_completion(&lcw_start_completion); + CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n"); + + EXIT; +} + +static void lcw_dispatch_stop(void) +{ + ENTRY; + LASSERT(lcw_refcount == 0); + + CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n"); + + set_bit(LCW_FLAG_STOP, &lcw_flags); + complete(&lcw_event_completion); + + wait_for_completion(&lcw_stop_completion); + + CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n"); + + EXIT; +} + +struct lc_watchdog *lc_watchdog_add(int time, + void (*callback)(struct lc_watchdog *, + struct task_struct *, + void *), + void *data) +{ + struct lc_watchdog *lcw = NULL; + ENTRY; + + PORTAL_ALLOC(lcw, sizeof(*lcw)); + if (!lcw) { + CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n"); + RETURN(ERR_PTR(-ENOMEM)); + } + + lcw->lcw_task = current; + lcw->lcw_pid = current->pid; + lcw->lcw_time = (time * HZ) / 1000; + lcw->lcw_callback = callback ? callback : lc_watchdog_dumplog; + lcw->lcw_data = data; + lcw->lcw_state = LC_WATCHDOG_DISABLED; + + INIT_LIST_HEAD(&lcw->lcw_list); + + lcw->lcw_timer.function = lcw_cb; + lcw->lcw_timer.data = (unsigned long)lcw; + lcw->lcw_timer.expires = jiffies + lcw->lcw_time; + init_timer(&lcw->lcw_timer); + + down(&lcw_refcount_sem); + if (++lcw_refcount == 1) + lcw_dispatch_start(); + up(&lcw_refcount_sem); + + /* Keep this working in case we enable them by default */ + if (lcw->lcw_state == LC_WATCHDOG_ENABLED) { + do_gettimeofday(&lcw->lcw_last_touched); + add_timer(&lcw->lcw_timer); + } + + RETURN(lcw); +} +EXPORT_SYMBOL(lc_watchdog_add); + +static long +timeval_sub(struct timeval *large, struct timeval *small) +{ + return (large->tv_sec - small->tv_sec) * 1000000 + + (large->tv_usec - small->tv_usec); +} + +static void lcw_update_time(struct lc_watchdog *lcw, const char *message) +{ + struct timeval newtime; + unsigned long timediff; + + do_gettimeofday(&newtime); + if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) { + timediff = timeval_sub(&newtime, &lcw->lcw_last_touched); + CWARN("Expired watchdog for pid %d %s after %lu.%.4lus\n", + lcw->lcw_pid, + message, + timediff / 1000000, + (timediff % 1000000) / 100); + } + lcw->lcw_last_touched = newtime; +} + +void lc_watchdog_touch(struct lc_watchdog *lcw) +{ + unsigned long flags; + ENTRY; + LASSERT(lcw != NULL); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + if (!list_empty(&lcw->lcw_list)) + list_del_init(&lcw->lcw_list); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + lcw_update_time(lcw, "touched"); + lcw->lcw_state = LC_WATCHDOG_ENABLED; + + mod_timer(&lcw->lcw_timer, jiffies + lcw->lcw_time); + + EXIT; +} +EXPORT_SYMBOL(lc_watchdog_touch); + +void lc_watchdog_disable(struct lc_watchdog *lcw) +{ + unsigned long flags; + ENTRY; + LASSERT(lcw != NULL); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + if (!list_empty(&lcw->lcw_list)) + list_del_init(&lcw->lcw_list); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + lcw_update_time(lcw, "disabled"); + lcw->lcw_state = LC_WATCHDOG_DISABLED; + + EXIT; +} +EXPORT_SYMBOL(lc_watchdog_disable); + +void lc_watchdog_delete(struct lc_watchdog *lcw) +{ + unsigned long flags; + ENTRY; + LASSERT(lcw != NULL); + + del_timer(&lcw->lcw_timer); + + lcw_update_time(lcw, "deleted"); + + spin_lock_irqsave(&lcw_pending_timers_lock, flags); + if (!list_empty(&lcw->lcw_list)) + list_del_init(&lcw->lcw_list); + spin_unlock_irqrestore(&lcw_pending_timers_lock, flags); + + down(&lcw_refcount_sem); + if (--lcw_refcount == 0) + lcw_dispatch_stop(); + up(&lcw_refcount_sem); + + PORTAL_FREE(lcw, sizeof(*lcw)); + + EXIT; +} +EXPORT_SYMBOL(lc_watchdog_delete); + +/* + * Provided watchdog handlers + */ + +extern void portals_debug_dumplog_internal(void *arg); + +void lc_watchdog_dumplog(struct lc_watchdog *lcw, + struct task_struct *tsk, + void *data) +{ + tsk = tsk ? tsk : current; + portals_debug_dumplog_internal((void *)(long)tsk->pid); +} +EXPORT_SYMBOL(lc_watchdog_dumplog); diff --git a/lustre/ptlbd/server.c b/lustre/ptlbd/server.c index f25b811..b198284 100644 --- a/lustre/ptlbd/server.c +++ b/lustre/ptlbd/server.c @@ -54,7 +54,7 @@ static int ptlbd_sv_setup(struct obd_device *obd, obd_count len, void *buf) ptlbd->ptlbd_service = ptlrpc_init_svc(PTLBD_NBUFS, PTLBD_BUFSIZE, PTLBD_MAXREQSIZE, - PTLBD_REQUEST_PORTAL, PTLBD_REPLY_PORTAL, + PTLBD_REQUEST_PORTAL, PTLBD_REPLY_PORTAL, 30000, ptlbd_handle, "ptlbd_sv", obd->obd_proc_entry); diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 94eb45d..6f0c7ea 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -126,6 +126,9 @@ ptlrpc_grow_req_bufs(struct ptlrpc_srv_ni *srv_ni) struct ptlrpc_request_buffer_desc *rqbd; int i; + CDEBUG(D_RPCTRACE, "%s: allocate %d new %d-byte reqbufs (%d/%d left)\n", + svc->srv_name, svc->srv_nbuf_per_group, svc->srv_buf_size, + srv_ni->sni_nrqbd_receiving, svc->srv_nbufs); for (i = 0; i < svc->srv_nbuf_per_group; i++) { rqbd = ptlrpc_alloc_rqbd(srv_ni); @@ -275,7 +278,7 @@ ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc) struct ptlrpc_service * ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, - int req_portal, int rep_portal, + int req_portal, int rep_portal, int watchdog_timeout, svc_handler_t handler, char *name, struct proc_dir_entry *proc_entry) { @@ -306,6 +309,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, service->srv_buf_size = bufsize; service->srv_rep_portal = rep_portal; service->srv_req_portal = req_portal; + service->srv_watchdog_timeout = watchdog_timeout; service->srv_handler = handler; INIT_LIST_HEAD(&service->srv_request_queue); @@ -695,6 +699,7 @@ static int ptlrpc_main(void *arg) struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg; struct ptlrpc_service *svc = data->svc; struct ptlrpc_thread *thread = data->thread; + struct lc_watchdog *watchdog; unsigned long flags; ENTRY; @@ -716,6 +721,9 @@ static int ptlrpc_main(void *arg) thread->t_flags = SVC_RUNNING; wake_up(&thread->t_ctl_waitq); + watchdog = lc_watchdog_add(svc->srv_watchdog_timeout, + LC_WATCHDOG_DEFAULT_CB, NULL); + spin_lock_irqsave(&svc->srv_lock, flags); svc->srv_nthreads++; spin_unlock_irqrestore(&svc->srv_lock, flags); @@ -728,6 +736,8 @@ static int ptlrpc_main(void *arg) struct l_wait_info lwi = LWI_TIMEOUT(svc->srv_rqbd_timeout, ptlrpc_retry_rqbds, svc); + lc_watchdog_disable(watchdog); + l_wait_event_exclusive (svc->srv_waitq, ((thread->t_flags & SVC_STOPPING) != 0 && svc->srv_n_difficult_replies == 0) || @@ -740,6 +750,8 @@ static int ptlrpc_main(void *arg) (svc->srv_nthreads - 1))), &lwi); + lc_watchdog_touch(watchdog); + ptlrpc_check_rqbd_pools(svc); if (!list_empty (&svc->srv_reply_queue)) @@ -770,6 +782,8 @@ static int ptlrpc_main(void *arg) spin_unlock_irqrestore(&svc->srv_lock, flags); + lc_watchdog_delete(watchdog); + CDEBUG(D_NET, "service thread exiting, process %d\n", current->pid); return 0; } diff --git a/lustre/scripts/lustrefs b/lustre/scripts/lustrefs index 976e8e7..8148b75 100644 --- a/lustre/scripts/lustrefs +++ b/lustre/scripts/lustrefs @@ -9,7 +9,7 @@ # Authors: Bill Nottingham # Miquel van Smoorenburg, # -# chkconfig: 345 25 75 +# chkconfig: 345 26 74 # description: Mounts and unmounts all Lustre mount points. # ### BEGIN INIT INFO diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 0e49c43..e91b1b2 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -184,16 +184,53 @@ static int lfs_find(int argc, char **argv) static int lfs_getstripe(int argc, char **argv) { + struct option long_opts[] = { + {"quiet", 0, 0, 'q'}, + {"verbose", 0, 0, 'v'}, + {0, 0, 0, 0} + }; + char short_opts[] = "qv"; + int quiet, verbose, recursive, c, rc; struct obd_uuid *obduuid = NULL; - int rc; - if (argc != 2) - return CMD_HELP; + optind = 0; + quiet = verbose = recursive = 0; + while ((c = getopt_long(argc, argv, short_opts, + long_opts, NULL)) != -1) { + switch (c) { + case 'o': + if (obduuid) { + fprintf(stderr, + "error: %s: only one obduuid allowed", + argv[0]); + return CMD_HELP; + } + obduuid = (struct obd_uuid *)optarg; + break; + case 'q': + quiet++; + verbose = 0; + break; + case 'v': + verbose++; + quiet = 0; + break; + case '?': + return CMD_HELP; + break; + default: + fprintf(stderr, "error: %s: option '%s' unrecognized\n", + argv[0], argv[optind - 1]); + return CMD_HELP; + break; + } + } - optind = 1; + if (optind >= argc) + return CMD_HELP; do { - rc = llapi_find(argv[optind], obduuid, 0, 0, 0); + rc = llapi_find(argv[optind], obduuid, recursive,verbose,quiet); } while (++optind < argc && !rc); if (rc) -- 1.8.3.1