[PATCH] mincore for i386, against 2.3.40-3

Chuck Lever cel en monkey.org
Vie Ene 21 15:14:01 CST 2000


hi all,

here's the mincore implementation again, with some improvements thanks to
Jeff Garzik and Chris Wing.

-  mincore no longer uses vmalloc
-  mincore can scan through arbitrarily large vm areas using only a fixed
amount of kernel memory
-  vm_operations_struct instantiations use the new method which declares
only the structure fields needed, the rest being initialized as zero
-  mincore_area loop rewritten to extract the "if"
-  default incore function added, which simply checks pte_present

comments welcome.

diff -ruN Linux-2.3.40-3/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S
--- Linux-2.3.40-3/arch/i386/kernel/entry.S	Tue Jan 11 12:40:43 2000
+++ linux/arch/i386/kernel/entry.S	Wed Jan 19 18:14:46 2000
@@ -617,6 +617,7 @@
 	.long SYMBOL_NAME(sys_setgid)
 	.long SYMBOL_NAME(sys_setfsuid)		/* 215 */
 	.long SYMBOL_NAME(sys_setfsgid)
+	.long SYMBOL_NAME(sys_mincore)
 
 
 	/*
@@ -625,6 +626,6 @@
 	 * entries. Don't panic if you notice that this hasn't
 	 * been shrunk every time we add a new system call.
 	 */
-	.rept NR_syscalls-216
+	.rept NR_syscalls-217
 		.long SYMBOL_NAME(sys_ni_syscall)
 	.endr
diff -ruN Linux-2.3.40-3/drivers/sgi/char/graphics.c linux/drivers/sgi/char/graphics.c
--- Linux-2.3.40-3/drivers/sgi/char/graphics.c	Tue Jan 11 12:33:01 2000
+++ linux/drivers/sgi/char/graphics.c	Wed Jan 19 17:41:48 2000
@@ -254,15 +254,7 @@
  */
 
 static struct vm_operations_struct graphics_mmap = {
-	NULL,			/* no special mmap-open */
-	NULL,			/* no special mmap-close */
-	NULL,			/* no special mmap-unmap */
-	NULL,			/* no special mmap-protect */
-	NULL,			/* no special mmap-sync */
-	NULL,			/* no special mmap-advise */
-	sgi_graphics_nopage,	/* our magic no-page fault handler */
-	NULL,			/* no special mmap-wppage */
-	NULL			/* no special mmap-swapout */
+	nopage: sgi_graphics_nopage,	/* our magic no-page fault handler */
 };
 	
 int
diff -ruN Linux-2.3.40-3/drivers/sgi/char/shmiq.c linux/drivers/sgi/char/shmiq.c
--- Linux-2.3.40-3/drivers/sgi/char/shmiq.c	Tue Jan 11 12:33:01 2000
+++ linux/drivers/sgi/char/shmiq.c	Wed Jan 19 17:42:12 2000
@@ -296,15 +296,7 @@
 }
 
 static struct vm_operations_struct qcntl_mmap = {
-	NULL,			/* no special mmap-open */
-	NULL,			/* no special mmap-close */
-	NULL,			/* no special mmap-unmap */
-	NULL,			/* no special mmap-protect */
-	NULL,			/* no special mmap-sync */
-	NULL,			/* no special mmap-advise */
-	shmiq_nopage,		/* our magic no-page fault handler */
-	NULL,			/* no special mmap-wppage */
-	NULL			/* no special mmap-swapout */
+	nopage: shmiq_nopage,	/* our magic no-page fault handler */
 };
 
 static int
diff -ruN Linux-2.3.40-3/fs/ncpfs/mmap.c linux/fs/ncpfs/mmap.c
--- Linux-2.3.40-3/fs/ncpfs/mmap.c	Wed Nov 24 16:47:44 1999
+++ linux/fs/ncpfs/mmap.c	Wed Jan 19 17:42:47 2000
@@ -94,15 +94,7 @@
 
 struct vm_operations_struct ncp_file_mmap =
 {
-	NULL,			/* open */
-	NULL,			/* close */
-	NULL,			/* unmap */
-	NULL,			/* protect */
-	NULL,			/* sync */
-	NULL,			/* advise */
-	ncp_file_mmap_nopage,	/* nopage */
-	NULL,			/* wppage */
-	NULL			/* swapout */
+	nopage: ncp_file_mmap_nopage,
 };
 
 
diff -ruN Linux-2.3.40-3/include/asm-i386/unistd.h linux/include/asm-i386/unistd.h
--- Linux-2.3.40-3/include/asm-i386/unistd.h	Tue Jan 11 12:40:56 2000
+++ linux/include/asm-i386/unistd.h	Tue Jan 18 15:25:43 2000
@@ -221,6 +221,7 @@
 #define __NR_setgid32		214
 #define __NR_setfsuid32		215
 #define __NR_setfsgid32		216
+#define __NR_mincore		217
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
diff -ruN Linux-2.3.40-3/include/linux/mm.h linux/include/linux/mm.h
--- Linux-2.3.40-3/include/linux/mm.h	Fri Jan 14 15:43:09 2000
+++ linux/include/linux/mm.h	Wed Jan 19 17:26:47 2000
@@ -106,6 +106,7 @@
 	void (*protect)(struct vm_area_struct *area, unsigned long, size_t, unsigned int newprot);
 	int (*sync)(struct vm_area_struct *area, unsigned long, size_t, unsigned int flags);
 	void (*advise)(struct vm_area_struct *area, unsigned long, size_t, unsigned int advise);
+	char (*incore)(struct vm_area_struct *area, unsigned long);
 	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access);
 	struct page * (*wppage)(struct vm_area_struct * area, unsigned long address, struct page * page);
 	int (*swapout)(struct page *, struct file *);
@@ -383,6 +384,7 @@
 extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
 extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
 
+extern char default_incore(struct vm_area_struct * vma, unsigned long address);
 extern void vmtruncate(struct inode * inode, loff_t offset);
 extern int handle_mm_fault(struct task_struct *tsk,struct vm_area_struct *vma, unsigned long address, int write_access);
 extern int make_pages_present(unsigned long addr, unsigned long end);
@@ -446,6 +448,8 @@
 			size_t size, unsigned int flags);
 extern struct page *filemap_nopage(struct vm_area_struct * area,
 				    unsigned long address, int no_share);
+extern char filemap_incore(struct vm_area_struct * vma,
+				unsigned long pgoff);
 
 /*
  * GFP bitmasks..
diff -ruN Linux-2.3.40-3/ipc/shm.c linux/ipc/shm.c
--- Linux-2.3.40-3/ipc/shm.c	Fri Jan 14 15:43:09 2000
+++ linux/ipc/shm.c	Wed Jan 19 17:42:27 2000
@@ -64,6 +64,7 @@
 static void killseg (int shmid);
 static void shm_open (struct vm_area_struct *shmd);
 static void shm_close (struct vm_area_struct *shmd);
+static char shm_incore (struct vm_area_struct *shmd, unsigned long pgoff);
 static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
 static int shm_swapout(struct page *, struct file *);
 #ifdef CONFIG_PROC_FS
@@ -584,15 +585,11 @@
  */
 
 static struct vm_operations_struct shm_vm_ops = {
-	shm_open,		/* open - callback for a new vm-area open */
-	shm_close,		/* close - callback for when the vm-area is released */
-	NULL,			/* no need to sync pages at unmap */
-	NULL,			/* protect */
-	NULL,			/* sync */
-	NULL,			/* advise */
-	shm_nopage,		/* nopage */
-	NULL,			/* wppage */
-	shm_swapout		/* swapout */
+	open: shm_open,
+	close: shm_close,
+	incore: shm_incore,
+	nopage: shm_nopage,
+	swapout: shm_swapout,
 };
 
 /* Insert shmd into the list shp->attaches */
@@ -829,6 +826,34 @@
 static int shm_swapout(struct page * page, struct file *file)
 {
 	return 0;
+}
+
+/*
+ * is page present?
+ */
+static char shm_incore(struct vm_area_struct * shmd, unsigned long pgoff)
+{
+	char result = 0;
+	pte_t pte;
+	struct shmid_kernel * shp = (struct shmid_kernel *) shmd->vm_private_data;
+
+	down(&shp->sem);
+	if(shp != shm_lock(shp->id))
+		BUG();
+
+	pte = SHM_ENTRY(shp, pgoff);
+
+	/*
+	 * the pte isn't present if the page is swapped, or if it hasn't
+	 * been touched yet.
+	 */
+	if (pte_present(pte))
+		result = 1;
+
+	shm_unlock(shp->id);
+	up(&shp->sem);
+
+	return result;
 }
 
 /*
diff -ruN Linux-2.3.40-3/mm/filemap.c linux/mm/filemap.c
--- Linux-2.3.40-3/mm/filemap.c	Fri Jan 14 15:43:09 2000
+++ linux/mm/filemap.c	Wed Jan 19 17:42:41 2000
@@ -700,6 +700,30 @@
 	return page;
 }
 
+/*
+ * This predicate returns 1 if the page is "in core," otherwise 0.
+ *
+ * Later we can get more picky about what "in core" means precisely,
+ * but for now, it simply checks to see if the page is in the page
+ * cache, and is up to date; i.e. that no page-in operation would be
+ * required at this time if an application were to map and access
+ * this page.
+ */
+char filemap_incore(struct vm_area_struct * vma, unsigned long pgoff)
+{
+	char result = 0;
+	struct address_space * as = &vma->vm_file->f_dentry->d_inode->i_data;
+	struct page * page, ** hash = page_hash(as, pgoff);
+
+	spin_lock(&pagecache_lock);
+	page = __find_page_nolock(as, pgoff, *hash);
+	if ((page) && (Page_Uptodate(page)))
+		result = 1;
+	spin_unlock(&pagecache_lock);
+
+	return result;
+}
+
 #if 0
 #define PROFILE_READAHEAD
 #define DEBUG_READAHEAD
@@ -1627,15 +1651,11 @@
  * backing-store for swapping..
  */
 static struct vm_operations_struct file_shared_mmap = {
-	NULL,			/* no special open */
-	NULL,			/* no special close */
-	filemap_unmap,		/* unmap - we need to sync the pages */
-	NULL,			/* no special protect */
-	filemap_sync,		/* sync */
-	NULL,			/* advise */
-	filemap_nopage,		/* nopage */
-	NULL,			/* wppage */
-	filemap_swapout		/* swapout */
+	unmap: filemap_unmap,	/* unmap - we need to sync the pages */
+	sync: filemap_sync,
+	incore: filemap_incore,
+	nopage: filemap_nopage,
+	swapout: filemap_swapout,
 };
 
 /*
@@ -1645,15 +1665,8 @@
  * know they can't ever get write permissions..)
  */
 static struct vm_operations_struct file_private_mmap = {
-	NULL,			/* open */
-	NULL,			/* close */
-	NULL,			/* unmap */
-	NULL,			/* protect */
-	NULL,			/* sync */
-	NULL,			/* advise */
-	filemap_nopage,		/* nopage */
-	NULL,			/* wppage */
-	NULL			/* swapout */
+	incore: filemap_incore,
+	nopage: filemap_nopage,
 };
 
 /* This is used for a general mmap of a disk file */
diff -ruN Linux-2.3.40-3/mm/memory.c linux/mm/memory.c
--- Linux-2.3.40-3/mm/memory.c	Tue Jan 11 12:40:57 2000
+++ linux/mm/memory.c	Wed Jan 19 17:24:45 2000
@@ -412,6 +412,23 @@
 	return NULL;
 }
 
+/*
+ * default vm operation for mincore() system call
+ */
+char default_incore(struct vm_area_struct * vma, unsigned long address)
+{
+	pgd_t *pgd = pgd_offset(vma->vm_mm, address);
+	pmd_t *pmd = pmd_offset(pgd, address);
+
+	if (pmd) {
+		pte_t * pte = pte_offset(pmd, address);
+		if (pte && pte_present(*pte))
+			return 1;
+	}
+
+	return 0;
+}
+
 /* 
  * Given a physical address, is there a useful struct page pointing to it?
  */
diff -ruN Linux-2.3.40-3/mm/mmap.c linux/mm/mmap.c
--- Linux-2.3.40-3/mm/mmap.c	Tue Dec 21 13:59:00 1999
+++ linux/mm/mmap.c	Wed Jan 19 17:55:57 2000
@@ -720,6 +720,142 @@
 }
 
 /*
+ * If mincore_area can't find an "incore" predicate for this vma,
+ * just assume that the pages are present.  This is reasonable for
+ * device drivers and file systems that don't use the page cache.
+ */
+static long mincore_area(struct vm_area_struct * vma,
+	unsigned long start, unsigned long end, char * vec)
+{
+	int remaining, error, size, i;
+	char * tmp;
+
+	start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+	if (end > vma->vm_end)
+		end = vma->vm_end;
+	end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+
+	/* # of bytes in "vec" = # of pages */
+	size = end - start;
+
+	tmp = (char *) __get_free_page(GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	error = 0;
+	for (remaining = size, i = 0;
+	     remaining > 0;
+	     remaining -= PAGE_SIZE, i++) {
+		int j = 0;
+		int thispiece = (remaining < PAGE_SIZE) ? remaining : PAGE_SIZE;
+
+		if (vma->vm_ops && vma->vm_ops->incore) {
+			while (j < thispiece)
+				tmp[j++] = vma->vm_ops->incore(vma, start++);
+		} else {
+			while (j < thispiece)
+				tmp[j++] = default_incore(vma, start++);
+		}
+
+		if (copy_to_user(vec + PAGE_SIZE * i, tmp, thispiece)) {
+			error = -EFAULT;
+			break;
+		}
+	}
+
+	free_page((unsigned long) tmp);
+	return error;
+}
+
+/*
+ * The mincore(2) system call.
+ *
+ * mincore() returns the memory residency status of the pages in the
+ * current process's address space specified by [addr, addr + len).
+ * The status is returned in a vector of bytes.  The least significant
+ * bit of each byte is 1 if the referenced page is in memory, otherwise
+ * it is zero.
+ *
+ * Because the status of a page can change after mincore() checks it
+ * but before it returns to the application, the returned vector may
+ * contain stale information.  Only locked pages are guaranteed to
+ * remain in memory.
+ *
+ * return values:
+ *  zero    - success
+ *  -EFAULT - vec points to an illegal address
+ *  -EINVAL - addr is not a multiple of PAGE_CACHE_SIZE,
+ *		or len has a nonpositive value
+ *  -ENOMEM - Addresses in the range [addr, addr + len] are
+ *		invalid for the address space of this process, or
+ *		specify one or more pages which are not currently
+ *		mapped
+ */
+asmlinkage long sys_mincore(unsigned long start, size_t len, char *vec)
+{
+	int index = 0;
+	unsigned long end;
+	struct vm_area_struct * vma;
+	int unmapped_error = 0;
+	int error = -EINVAL;
+
+	down(&current->mm->mmap_sem);
+
+	if (start & ~PAGE_MASK)
+		goto out;
+	len = (len + ~PAGE_MASK) & PAGE_MASK;
+	end = start + len;
+	if (end < start)
+		goto out;
+
+	error = 0;
+	if (end == start)
+		goto out;
+
+	/*
+	 * If the interval [start,end) covers some unmapped address
+	 * ranges, just ignore them, but return -ENOMEM at the end.
+	 */
+	vma = find_vma(current->mm, start);
+	for (;;) {
+		/* Still start < end. */
+		error = -ENOMEM;
+		if (!vma)
+			goto out;
+
+		/* Here start < vma->vm_end. */
+		if (start < vma->vm_start) {
+			unmapped_error = -ENOMEM;
+			start = vma->vm_start;
+		}
+
+		/* Here vma->vm_start <= start < vma->vm_end. */
+		if (end <= vma->vm_end) {
+			if (start < end) {
+				error = mincore_area(vma, start, end,
+							&vec[index]);
+				if (error)
+					goto out;
+			}
+			error = unmapped_error;
+			goto out;
+		}
+
+		/* Here vma->vm_start <= start < vma->vm_end < end. */
+		error = mincore_area(vma, start, vma->vm_end, &vec[index]);
+		if (error)
+			goto out;
+		index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT;
+		start = vma->vm_end;
+		vma = vma->vm_next;
+	}
+
+out:
+	up(&current->mm->mmap_sem);
+	return error;
+}
+
+/*
  *  this is really a simplified "do_mmap".  it only handles
  *  anonymous maps.  eventually we may be able to do some
  *  brk-specific accounting here.
diff -ruN Linux-2.3.40-3/net/packet/af_packet.c linux/net/packet/af_packet.c
--- Linux-2.3.40-3/net/packet/af_packet.c	Fri Dec 31 15:04:42 1999
+++ linux/net/packet/af_packet.c	Wed Jan 19 17:42:55 2000
@@ -1537,15 +1537,8 @@
 }
 
 static struct vm_operations_struct packet_mmap_ops = {
-	packet_mm_open,		/* open */
-	packet_mm_close,	/* close */
-	NULL,			/* unmap */
-	NULL,			/* no special protect */
-	NULL,			/* sync */
-	NULL,			/* advise */
-	NULL,			/* nopage */
-	NULL,			/* wppage */
-	NULL			/* swapout */
+	open: packet_mm_open,
+	close: packet_mm_close,
 };
 
 static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)

	- Chuck Lever
--
corporate:	<chuckl en netscape.com>
personal:	<chucklever en netscape.net> or <cel en monkey.org>

The Linux Scalability project:
	http://www.citi.umich.edu/projects/linux-scalability/


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo en vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



Más información sobre la lista de distribución Ayuda