buggy GFP_KERNEL allocators
Manfred Spraul
manfreds en colorfullife.com
Mie Ene 26 18:31:23 CST 2000
Ben LaHaise wrote:
>
> Hello Rik & all,
>
> Here's a patch for two of the places that touch userspace after modifying
> current->state. Alan, could you include these in the next pre patch?
>
> -ben
write_wchan (in drivers/char/n_tty.c) is another place where user space
it touched with current->state!= TASK_RUNNING.
I've attached my "debug kernel" patch for 2.3, it detected these
TASK_RUNNING buglets. Perhaps you can use it.
--
Manfred
P.S.: I hope my previous patch didn't make it to the list, I
accidentially attached a 1 MB patch with .orig files and kernel
disassembly. Sorry if I wasn't fast enough to stop it.
------------ próxima parte ------------
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 3
// SUBLEVEL = 39
// EXTRAVERSION =
diff -u -r -N 2.3/arch/i386/kernel/irq.c build-2.3/arch/i386/kernel/irq.c
--- 2.3/arch/i386/kernel/irq.c Sat Jan 8 11:03:19 2000
+++ build-2.3/arch/i386/kernel/irq.c Wed Jan 12 18:02:17 2000
@@ -208,10 +208,21 @@
do_flush_tlb_local();
}
+extern void show_stack(unsigned long* stack);
+
+static void show_ipi(void* info)
+{
+ int cpu = smp_processor_id();
+
+ printk(KERN_EMERG "CPU %d: [irq %d bh %d]\n",
+ cpu, local_irq_count[cpu], local_bh_count[cpu]);
+ printk(KERN_EMERG "Stack dump:\n");
+ show_stack(NULL);
+
+}
+
static void show(char * str)
{
- int i;
- unsigned long *stack;
int cpu = smp_processor_id();
printk("\n%s, CPU %d:\n", str, cpu);
@@ -219,13 +230,14 @@
atomic_read(&global_irq_count), local_irq_count[0], local_irq_count[1]);
printk("bh: %d [%d %d]\n",
atomic_read(&global_bh_count), local_bh_count[0], local_bh_count[1]);
- stack = (unsigned long *) &stack;
- for (i = 40; i ; i--) {
- unsigned long x = *++stack;
- if (x > (unsigned long) &get_option && x < (unsigned long) &vsprintf) {
- printk("<[%08lx]> ", x);
- }
+ printk("Stack dump:\n");
+ show_stack(NULL);
+
+ if(hardirq_trylock(smp_processor_id())) {
+ smp_call_function(show_ipi, NULL, 1, 1);
+ hardirq_endlock(smp_processor_id());
}
+ printk("%s: spinning again.\n", str);
}
#define MAXCOUNT 100000000
diff -u -r -N 2.3/arch/i386/kernel/traps.c build-2.3/arch/i386/kernel/traps.c
--- 2.3/arch/i386/kernel/traps.c Tue Dec 21 09:57:59 1999
+++ build-2.3/arch/i386/kernel/traps.c Thu Jan 13 16:41:07 2000
@@ -124,19 +124,58 @@
/*
* These constants are for searching for possible module text
- * segments. VMALLOC_OFFSET comes from mm/vmalloc.c; MODULE_RANGE is
- * a guess of how much space is likely to be vmalloced.
+ * segments. MODULE_RANGE is a guess of how much space is likely
+ * to be vmalloced.
*/
-#define VMALLOC_OFFSET (8*1024*1024)
#define MODULE_RANGE (8*1024*1024)
+void show_stack(unsigned long * esp)
+{
+ unsigned long *stack, addr, module_start, module_end;
+ int i;
+
+ if(esp == NULL)
+ esp = (unsigned long*)&esp;
+ stack = esp;
+ for(i=0; i < kstack_depth_to_print; i++) {
+ if (((long) stack & (2*PAGE_SIZE-1)) == 0)
+ break;
+ if (i && ((i % 8) == 0))
+ printk("\n ");
+ printk("%08lx ", *stack++);
+ }
+ printk("\nCall Trace: ");
+ stack = esp;
+ i = 1;
+ module_start = VMALLOC_START;
+ module_end = module_start + MODULE_RANGE;
+ while (((long) stack & (2*PAGE_SIZE-1)) != 0) {
+ addr = *stack++;
+ /*
+ * If the address is either in the text segment of the
+ * kernel, or in the region which contains vmalloc'ed
+ * memory, it *may* be the address of a calling
+ * routine; if so, print it so that someone tracing
+ * down the cause of the crash will be able to figure
+ * out the call path that was taken.
+ */
+ if (((addr >= (unsigned long) &_stext) &&
+ (addr <= (unsigned long) &_etext)) ||
+ ((addr >= module_start) && (addr <= module_end))) {
+ if (i && ((i % 8) == 0))
+ printk("\n ");
+ printk("[<%08lx>] ", addr);
+ i++;
+ }
+ }
+}
+
static void show_registers(struct pt_regs *regs)
{
int i;
int in_kernel = 1;
unsigned long esp;
unsigned short ss;
- unsigned long *stack, addr, module_start, module_end;
esp = (unsigned long) (1+regs);
ss = __KERNEL_DS;
@@ -160,43 +199,38 @@
* time of the fault..
*/
if (in_kernel) {
+ pgd_t * pgdir;
+ pmd_t * pgmiddle;
+ pte_t * pgtable;
+
printk("\nStack: ");
- stack = (unsigned long *) esp;
- for(i=0; i < kstack_depth_to_print; i++) {
- if (((long) stack & 4095) == 0)
- break;
- if (i && ((i % 8) == 0))
- printk("\n ");
- printk("%08lx ", *stack++);
- }
- printk("\nCall Trace: ");
- stack = (unsigned long *) esp;
- i = 1;
- module_start = PAGE_OFFSET + (max_mapnr << PAGE_SHIFT);
- module_start = ((module_start + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1));
- module_end = module_start + MODULE_RANGE;
- while (((long) stack & 4095) != 0) {
- addr = *stack++;
- /*
- * If the address is either in the text segment of the
- * kernel, or in the region which contains vmalloc'ed
- * memory, it *may* be the address of a calling
- * routine; if so, print it so that someone tracing
- * down the cause of the crash will be able to figure
- * out the call path that was taken.
- */
- if (((addr >= (unsigned long) &_stext) &&
- (addr <= (unsigned long) &_etext)) ||
- ((addr >= module_start) && (addr <= module_end))) {
- if (i && ((i % 8) == 0))
- printk("\n ");
- printk("[<%08lx>] ", addr);
- i++;
- }
- }
+ if(esp >= PAGE_OFFSET && esp < high_memory)
+ show_stack((unsigned long*)esp);
+ else
+ printk("Bad stack pointer.");
+
printk("\nCode: ");
- for(i=0;i<20;i++)
- printk("%02x ", ((unsigned char *)regs->eip)[i]);
+ if(regs->eip < PAGE_OFFSET)
+ goto bad;
+
+ pgdir = pgd_offset(current->mm,regs->eip);
+ if(pgd_none(*pgdir) || pgd_bad(*pgdir))
+ goto bad;
+
+ pgmiddle = pmd_offset(pgdir,regs->eip);
+ if(pmd_none(*pgmiddle) || pmd_bad(*pgmiddle))
+ goto bad;
+
+ pgtable = pte_offset(pgmiddle,regs->eip);
+ if(!pte_present(*pgtable))
+ {
+bad:
+ printk(" Bad EIP pointer.");
+ } else
+ {
+ for(i=0;i<20;i++)
+ printk("%02x ", ((unsigned char *)regs->eip)[i]);
+ }
}
printk("\n");
}
diff -u -r -N 2.3/drivers/char/n_tty.c build-2.3/drivers/char/n_tty.c
--- 2.3/drivers/char/n_tty.c Fri Oct 29 18:14:10 1999
+++ build-2.3/drivers/char/n_tty.c Wed Jan 12 19:10:16 2000
@@ -1094,7 +1094,9 @@
nr -= num;
if (nr == 0)
break;
+ set_current_state(TASK_RUNNING);
get_user(c, b);
+ set_current_state(TASK_INTERRUPTIBLE);
if (opost(c, tty) < 0)
break;
b++; nr--;
@@ -1102,7 +1104,9 @@
if (tty->driver.flush_chars)
tty->driver.flush_chars(tty);
} else {
+ set_current_state(TASK_RUNNING);
c = tty->driver.write(tty, 1, b, nr);
+ set_current_state(TASK_INTERRUPTIBLE);
if (c < 0) {
retval = c;
goto break_out;
diff -u -r -N 2.3/fs/namei.c build-2.3/fs/namei.c
--- 2.3/fs/namei.c Thu Jan 6 22:57:20 2000
+++ build-2.3/fs/namei.c Wed Jan 12 20:14:04 2000
@@ -780,16 +780,14 @@
char * tmp;
struct dentry * dentry;
- lock_kernel();
- error = -EPERM;
if (S_ISDIR(mode) || (!S_ISFIFO(mode) && !capable(CAP_MKNOD)))
- goto out;
+ return -EPERM;
tmp = getname(filename);
- error = PTR_ERR(tmp);
if (IS_ERR(tmp))
- goto out;
+ return PTR_ERR(tmp);
error = -EINVAL;
+ lock_kernel();
switch (mode & S_IFMT) {
case 0:
mode |= S_IFREG; /* fallthrough */
@@ -815,10 +813,9 @@
}
break;
}
+ unlock_kernel();
putname(tmp);
-out:
- unlock_kernel();
return error;
}
@@ -870,14 +867,14 @@
int error;
char * tmp;
- lock_kernel();
tmp = getname(pathname);
- error = PTR_ERR(tmp);
- if (!IS_ERR(tmp)) {
- error = do_mkdir(tmp,mode);
- putname(tmp);
- }
+ if(IS_ERR(tmp))
+ return PTR_ERR(tmp);
+ lock_kernel();
+ error = do_mkdir(tmp,mode);
unlock_kernel();
+ putname(tmp);
+
return error;
}
@@ -965,14 +962,15 @@
int error;
char * tmp;
- lock_kernel();
tmp = getname(pathname);
- error = PTR_ERR(tmp);
- if (!IS_ERR(tmp)) {
- error = do_rmdir(tmp);
- putname(tmp);
- }
+ if(IS_ERR(tmp))
+ return PTR_ERR(tmp);
+ lock_kernel();
+ error = do_rmdir(tmp);
unlock_kernel();
+
+ putname(tmp);
+
return error;
}
@@ -1018,14 +1016,14 @@
int error;
char * tmp;
- lock_kernel();
tmp = getname(pathname);
- error = PTR_ERR(tmp);
- if (!IS_ERR(tmp)) {
- error = do_unlink(tmp);
- putname(tmp);
- }
+ if(IS_ERR(tmp))
+ return PTR_ERR(tmp);
+ lock_kernel();
+ error = do_unlink(tmp);
unlock_kernel();
+ putname(tmp);
+
return error;
}
@@ -1068,21 +1066,20 @@
{
int error;
char * from;
+ char * to;
- lock_kernel();
from = getname(oldname);
- error = PTR_ERR(from);
- if (!IS_ERR(from)) {
- char * to;
- to = getname(newname);
- error = PTR_ERR(to);
- if (!IS_ERR(to)) {
- error = do_symlink(from,to);
- putname(to);
- }
- putname(from);
+ if(IS_ERR(from))
+ return PTR_ERR(from);
+ to = getname(newname);
+ error = PTR_ERR(to);
+ if (!IS_ERR(to)) {
+ lock_kernel();
+ error = do_symlink(from,to);
+ unlock_kernel();
+ putname(to);
}
- unlock_kernel();
+ putname(from);
return error;
}
@@ -1156,21 +1153,21 @@
{
int error;
char * from;
+ char * to;
- lock_kernel();
from = getname(oldname);
- error = PTR_ERR(from);
- if (!IS_ERR(from)) {
- char * to;
- to = getname(newname);
- error = PTR_ERR(to);
- if (!IS_ERR(to)) {
- error = do_link(from,to);
- putname(to);
- }
- putname(from);
+ if(IS_ERR(from))
+ return PTR_ERR(from);
+ to = getname(newname);
+ error = PTR_ERR(to);
+ if (!IS_ERR(to)) {
+ lock_kernel();
+ error = do_link(from,to);
+ unlock_kernel();
+ putname(to);
}
- unlock_kernel();
+ putname(from);
+
return error;
}
@@ -1327,21 +1324,20 @@
{
int error;
char * from;
+ char * to;
- lock_kernel();
from = getname(oldname);
- error = PTR_ERR(from);
- if (!IS_ERR(from)) {
- char * to;
- to = getname(newname);
- error = PTR_ERR(to);
- if (!IS_ERR(to)) {
- error = do_rename(from,to);
- putname(to);
- }
- putname(from);
+ if(IS_ERR(from))
+ return PTR_ERR(from);
+ to = getname(newname);
+ error = PTR_ERR(to);
+ if (!IS_ERR(to)) {
+ lock_kernel();
+ error = do_rename(from,to);
+ unlock_kernel();
+ putname(to);
}
- unlock_kernel();
+ putname(from);
return error;
}
diff -u -r -N 2.3/include/asm-i386/spinlock.h build-2.3/include/asm-i386/spinlock.h
--- 2.3/include/asm-i386/spinlock.h Tue Dec 21 09:58:09 1999
+++ build-2.3/include/asm-i386/spinlock.h Thu Jan 13 14:25:52 2000
@@ -4,6 +4,7 @@
#include <asm/atomic.h>
#include <asm/rwlock.h>
#include <asm/page.h>
+#include <asm/bitops.h>
extern int printk(const char * fmt, ...)
__attribute__ ((format (printf, 1, 2)));
@@ -68,6 +69,7 @@
#define spin_unlock_string \
"lock ; btrl $0,%0"
+extern atomic_t spinlock_count;
extern inline void spin_lock(spinlock_t *lock)
{
#if SPINLOCK_DEBUG
@@ -81,10 +83,12 @@
__asm__ __volatile__(
spin_lock_string
:"=m" (__dummy_lock(lock)));
+ atomic_inc(&spinlock_count);
}
extern inline void spin_unlock(spinlock_t *lock)
{
+ atomic_dec(&spinlock_count);
#if SPINLOCK_DEBUG
if (lock->magic != SPINLOCK_MAGIC)
BUG();
@@ -96,7 +100,16 @@
:"=m" (__dummy_lock(lock)));
}
-#define spin_trylock(lock) (!test_and_set_bit(0,(lock)))
+extern inline int spin_trylock(spinlock_t *lock)
+{
+ int old = test_and_set_bit(0,(lock));
+
+ if(old)
+ return 0;
+
+ atomic_inc(&spinlock_count);
+ return 1;
+}
/*
* Read-write spinlocks, allowing multiple readers
@@ -138,6 +151,7 @@
extern inline void read_lock(rwlock_t *rw)
{
+ atomic_inc(&spinlock_count);
#if SPINLOCK_DEBUG
if (rw->magic != RWLOCK_MAGIC)
BUG();
@@ -147,6 +161,7 @@
extern inline void write_lock(rwlock_t *rw)
{
+ atomic_inc(&spinlock_count);
#if SPINLOCK_DEBUG
if (rw->magic != RWLOCK_MAGIC)
BUG();
@@ -154,14 +169,25 @@
__build_write_lock(rw, "__write_lock_failed");
}
-#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" (__dummy_lock(&(rw)->lock)))
-#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" (__dummy_lock(&(rw)->lock)))
+#define read_unlock(rw) \
+ do { \
+ atomic_dec(&spinlock_count); \
+ asm volatile("lock ; incl %0" :"=m" (__dummy_lock(&(rw)->lock))); \
+ } while(0)
+
+#define write_unlock(rw) \
+ do { \
+ atomic_dec(&spinlock_count); \
+ asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" (__dummy_lock(&(rw)->lock))); \
+ } while(0)
extern inline int write_trylock(rwlock_t *lock)
{
atomic_t *count = (atomic_t *)lock;
- if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+ if (atomic_sub_and_test(RW_LOCK_BIAS, count)) {
+ atomic_inc(&spinlock_count);
return 1;
+ }
atomic_add(RW_LOCK_BIAS, count);
return 0;
}
diff -u -r -N 2.3/include/asm-i386/uaccess.h build-2.3/include/asm-i386/uaccess.h
--- 2.3/include/asm-i386/uaccess.h Tue Oct 12 14:09:55 1999
+++ build-2.3/include/asm-i386/uaccess.h Thu Jan 13 14:25:55 2000
@@ -8,6 +8,8 @@
#include <linux/sched.h>
#include <asm/page.h>
+extern void try_sched(void);
+
#define VERIFY_READ 0
#define VERIFY_WRITE 1
@@ -113,6 +115,7 @@
/* Careful: we have to cast the result to the type of the pointer for sign reasons */
#define get_user(x,ptr) \
({ int __ret_gu,__val_gu; \
+ try_sched(); \
switch(sizeof (*(ptr))) { \
case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break; \
case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break; \
@@ -137,6 +140,7 @@
#define put_user(x,ptr) \
({ int __ret_pu; \
+ try_sched(); \
switch(sizeof (*(ptr))) { \
case 1: __put_user_x(1,__ret_pu,(__typeof__(*(ptr)))(x),ptr); break; \
case 2: __put_user_x(2,__ret_pu,(__typeof__(*(ptr)))(x),ptr); break; \
@@ -544,6 +548,7 @@
static inline unsigned long
__constant_copy_to_user(void *to, const void *from, unsigned long n)
{
+ try_sched();
if (access_ok(VERIFY_WRITE, to, n))
__constant_copy_user(to,from,n);
return n;
@@ -554,6 +559,7 @@
{
if (access_ok(VERIFY_READ, from, n))
__constant_copy_user_zeroing(to,from,n);
+ try_sched();
return n;
}
@@ -561,12 +567,14 @@
__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
{
__constant_copy_user(to,from,n);
+ try_sched();
return n;
}
static inline unsigned long
__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
{
+ try_sched();
__constant_copy_user_zeroing(to,from,n);
return n;
}
diff -u -r -N 2.3/kernel/exit.c build-2.3/kernel/exit.c
--- 2.3/kernel/exit.c Tue Dec 7 10:43:36 1999
+++ build-2.3/kernel/exit.c Wed Jan 12 18:20:14 2000
@@ -481,6 +481,7 @@
if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED))
continue;
read_unlock(&tasklist_lock);
+ current->state = TASK_RUNNING;
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
if (!retval && stat_addr)
retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
@@ -493,6 +494,7 @@
current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
read_unlock(&tasklist_lock);
+ current->state = TASK_RUNNING;
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
if (!retval && stat_addr)
retval = put_user(p->exit_code, stat_addr);
diff -u -r -N 2.3/kernel/sched.c build-2.3/kernel/sched.c
--- 2.3/kernel/sched.c Sat Jan 8 11:03:45 2000
+++ build-2.3/kernel/sched.c Wed Jan 12 19:55:05 2000
@@ -1186,3 +1186,25 @@
atomic_inc(&init_mm.mm_count);
}
+atomic_t spinlock_count = ATOMIC_INIT(0);
+void show_stack(unsigned long* esp);
+
+void try_sched(void)
+{
+ int lock_count = atomic_read(&spinlock_count);
+ if(current->lock_depth >= 0)
+ lock_count--;
+ if(lock_count != 0) {
+ printk(KERN_EMERG "schedule() called while caller owned a spinlock.\n");
+ atomic_set(&spinlock_count,0);
+ show_stack(NULL);
+ return;
+ }
+ if(current->state == TASK_INTERRUPTIBLE ||
+ current->state == TASK_UNINTERRUPTIBLE) {
+ printk(KERN_EMERG "task running around with state != RUNNING.\n");
+ show_stack(NULL);
+ }
+ schedule_timeout(80);
+}
+
diff -u -r -N 2.3/mm/slab.c build-2.3/mm/slab.c
--- 2.3/mm/slab.c Sat Jan 8 11:03:45 2000
+++ build-2.3/mm/slab.c Wed Jan 12 17:35:52 2000
@@ -1667,6 +1667,8 @@
{
cache_sizes_t *csizep = cache_sizes;
+ if(flags & __GFP_WAIT)
+ try_sched();
for (; csizep->cs_size; csizep++) {
if (size > csizep->cs_size)
continue;
Más información sobre la lista de distribución Ayuda