umount-root-6 (was Re: [PATCH] root-hopping for pre-2.3.41-3)

almesber en lrc.di.epfl.ch almesber en lrc.di.epfl.ch
Mie Ene 26 09:35:15 CST 2000


I wrote:
> Sigh, a new system call then :-(

Done ;-) I've attached the patch for pre-2.3.41-3 (also works with
2.3.40) and pivot_root.c . The complete package with examples is at
ftp://icaftp.epfl.ch/pub/people/almesber/misc/umount-root-6.tar.gz

Changes: /proc/mounts skips unreachable mount points, and quota is "dumb"
again. Umount flags are gone. Instead, there's a pivot_root system call
(with implicit global chroot), similar to what Peter suggested.

Concerning unreachable mount points: you can still get them by pivoting a
directory which is not a mount point itself, e.g. if you mount /iRoot and
/iRoot/opaque_office_beige, and then you pivot_root to
/iRoot/transp_light_yellow, /proc/mounts doesn't show
/iRoot/opaque_office_beige anymore. However, it "knows" that /iRoot is
special and shows it as /. I you  pivot_root /sth /sth/junk  again, all
that mess comes back under /junk/iRoot, as it should. So basically all
cases that make any sense in real life look pretty normal.

Cheers, Werner

-- 
  _________________________________________________________________________
 / Werner Almesberger, ICA, EPFL, CH       werner.almesberger en ica.epfl.ch /
/_IN_N_032__Tel_+41_21_693_6621__Fax_+41_21_693_6610_____________________/
------------ próxima parte ------------
--- linux/fs/dquot.c.orig	Tue Dec  7 02:09:28 1999
+++ linux/fs/dquot.c	Tue Jan 25 23:04:06 2000
@@ -797,17 +797,22 @@
 	return 0;
 }
 
-static void print_warning(struct dquot *dquot, int flag, char *fmtstr, ...)
+static void print_warning(struct dquot *dquot, int flag, const char *fmtstr)
 {
-	va_list args;
+	struct dentry *root;
+	char *path, *buffer;
 
 	if (!need_print_warning(dquot, flag))
 		return;
-	va_start(args, fmtstr);
-	vsprintf(quotamessage, fmtstr, args);
-	va_end(args);
+	root = dquot->dq_mnt->mnt_sb->s_root;
+	dget(root);
+	buffer = (char *) __get_free_page(GFP_KERNEL);
+	path = buffer ? d_path(root, buffer, PAGE_SIZE) : "?";
+	sprintf(quotamessage, fmtstr, path, quotatypes[dquot->dq_type]);
+	free_page((unsigned long) buffer);
 	tty_write_message(current->tty, quotamessage);
 	dquot->dq_flags |= flag;
+	dput(root);
 }
 
 static inline char ignore_hardlimit(struct dquot *dquot)
@@ -817,16 +822,13 @@
 
 static int check_idq(struct dquot *dquot, u_long inodes)
 {
-	short type = dquot->dq_type;
-
 	if (inodes <= 0 || dquot->dq_flags & DQ_FAKE)
 		return QUOTA_OK;
 
 	if (dquot->dq_ihardlimit &&
 	   (dquot->dq_curinodes + inodes) > dquot->dq_ihardlimit &&
             !ignore_hardlimit(dquot)) {
-		print_warning(dquot, DQ_INODES, "%s: write failed, %s file limit reached\n",
-			      dquot->dq_mnt->mnt_dirname, quotatypes[type]);
+		print_warning(dquot, DQ_INODES, "%s: write failed, %s file limit reached\n");
 		return NO_QUOTA;
 	}
 
@@ -834,17 +836,15 @@
 	   (dquot->dq_curinodes + inodes) > dquot->dq_isoftlimit &&
 	    dquot->dq_itime && CURRENT_TIME >= dquot->dq_itime &&
             !ignore_hardlimit(dquot)) {
-		print_warning(dquot, DQ_INODES, "%s: warning, %s file quota exceeded too long.\n",
-				dquot->dq_mnt->mnt_dirname, quotatypes[type]);
+		print_warning(dquot, DQ_INODES, "%s: warning, %s file quota exceeded too long.\n");
 		return NO_QUOTA;
 	}
 
 	if (dquot->dq_isoftlimit &&
 	   (dquot->dq_curinodes + inodes) > dquot->dq_isoftlimit &&
 	    dquot->dq_itime == 0) {
-		print_warning(dquot, 0, "%s: warning, %s file quota exceeded\n",
-				dquot->dq_mnt->mnt_dirname, quotatypes[type]);
-		dquot->dq_itime = CURRENT_TIME + dquot->dq_mnt->mnt_dquot.inode_expire[type];
+		print_warning(dquot, 0, "%s: warning, %s file quota exceeded\n");
+		dquot->dq_itime = CURRENT_TIME + dquot->dq_mnt->mnt_dquot.inode_expire[dquot->dq_type];
 	}
 
 	return QUOTA_OK;
@@ -852,8 +852,6 @@
 
 static int check_bdq(struct dquot *dquot, u_long blocks, char prealloc)
 {
-	short type = dquot->dq_type;
-
 	if (blocks <= 0 || dquot->dq_flags & DQ_FAKE)
 		return QUOTA_OK;
 
@@ -861,8 +859,7 @@
 	   (dquot->dq_curblocks + blocks) > dquot->dq_bhardlimit &&
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
-			print_warning(dquot, DQ_BLKS, "%s: write failed, %s disk limit reached.\n",
-					dquot->dq_mnt->mnt_dirname, quotatypes[type]);
+			print_warning(dquot, DQ_BLKS, "%s: write failed, %s disk limit reached.\n");
 		return NO_QUOTA;
 	}
 
@@ -871,8 +868,7 @@
 	    dquot->dq_btime && CURRENT_TIME >= dquot->dq_btime &&
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
-			print_warning(dquot, DQ_BLKS, "%s: write failed, %s disk quota exceeded too long.\n",
-					dquot->dq_mnt->mnt_dirname, quotatypes[type]);
+			print_warning(dquot, DQ_BLKS, "%s: write failed, %s disk quota exceeded too long.\n");
 		return NO_QUOTA;
 	}
 
@@ -880,9 +876,8 @@
 	   (dquot->dq_curblocks + blocks) > dquot->dq_bsoftlimit &&
 	    dquot->dq_btime == 0) {
 		if (!prealloc) {
-			print_warning(dquot, 0, "%s: warning, %s disk quota exceeded\n",
-					dquot->dq_mnt->mnt_dirname, quotatypes[type]);
-			dquot->dq_btime = CURRENT_TIME + dquot->dq_mnt->mnt_dquot.block_expire[type];
+			print_warning(dquot, 0, "%s: warning, %s disk quota exceeded\n");
+			dquot->dq_btime = CURRENT_TIME + dquot->dq_mnt->mnt_dquot.block_expire[dquot->dq_type];
 		}
 		else
 			/*
--- linux/fs/super.c.orig	Tue Jan 25 23:02:16 2000
+++ linux/fs/super.c	Wed Jan 26 06:32:57 2000
@@ -302,16 +302,24 @@
 
 int get_filesystem_info( char *buf )
 {
-	struct vfsmount *tmp = vfsmntlist;
+	struct vfsmount *tmp;
 	struct proc_fs_info *fs_infop;
 	struct proc_nfs_info *nfs_infop;
 	struct nfs_server *nfss;
 	int len = 0;
+	char *path,*buffer = (char *) __get_free_page(GFP_KERNEL);
 
-	while ( tmp && len < PAGE_SIZE - 160)
-	{
+	if (!buffer) return 0;
+	for (tmp = vfsmntlist; tmp && len < PAGE_SIZE - 160;
+	    tmp = tmp->mnt_next) {
+		path = __d_path(tmp->mnt_sb->s_root, buffer, PAGE_SIZE, 1);
+		if (!path && tmp->mnt_sb == current->fs->root->d_sb)
+			path = "/"; /* sub-mountpoint */
+		if (!path)
+			continue;
 		len += sprintf( buf + len, "%s %s %s %s",
-			tmp->mnt_devname, tmp->mnt_dirname, tmp->mnt_sb->s_type->name,
+			tmp->mnt_devname, path,
+			tmp->mnt_sb->s_type->name,
 			tmp->mnt_flags & MS_RDONLY ? "ro" : "rw" );
 		for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
 		  if (tmp->mnt_flags & fs_infop->flag) {
@@ -365,9 +373,9 @@
 				       nfss->hostname);
 		}
 		len += sprintf( buf + len, " 0 0\n" );
-		tmp = tmp->mnt_next;
 	}
 
+	free_page((unsigned long) buffer);
 	return len;
 }
 
@@ -681,7 +689,7 @@
 	shrink_dcache_sb(sb);
 	fsync_dev(dev);
 
-	if (dev==ROOT_DEV && !unmount_root) {
+	if (sb == current->fs->root->d_sb && !unmount_root) {
 		/*
 		 * Special case for "unmounting" root ...
 		 * we just try to remount it readonly.
@@ -1211,6 +1219,113 @@
 	}
 	panic("VFS: Unable to mount root fs on %s",
 		kdevname(ROOT_DEV));
+}
+
+
+static void chroot_fs_refs(struct dentry *old_root,
+    struct dentry *new_root)
+{
+	struct task_struct *p;
+
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		if (!p->fs) continue;
+		if (p->fs->root == old_root) {
+			dput(old_root);
+			p->fs->root = dget(new_root);
+			printk(KERN_DEBUG "chroot_fs_refs: changed root of "
+			    "process %d\n",p->pid);
+		}
+		if (p->fs->pwd == old_root) {
+			dput(old_root);
+			p->fs->pwd = dget(new_root);
+			printk(KERN_DEBUG "chroot_fs_refs: changed cwd of "
+			    "process %d\n",p->pid);
+		}
+	}
+	read_unlock(&tasklist_lock);
+}
+
+
+/*
+ * Moves the current root to put_root, and sets root/cwd of all processes
+ * which had them on the old root to new_root.
+ *
+ * Note:
+ *  - we don't move root/cwd if they are not at the root (reason: if something
+ *    cared enough to change them, it's probably wrong to force them elsewhere)
+ *  - it's okay to pick a root that isn't the root of a file system, e.g.
+ *    /nfs/my_root where /nfs is the mount point. Better avoid creating
+ *    unreachable mount points this way, though.
+ */
+
+asmlinkage long sys_pivot_root(const char *new_root, const char *put_old)
+{
+	struct dentry *root = current->fs->root;
+	struct dentry *d_new_root, *d_put_old, *covered;
+	struct dentry *root_dev_root, *new_root_dev_root;
+	struct dentry *walk, *next;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	lock_kernel();
+	d_new_root = namei(new_root);
+	if (IS_ERR(d_new_root)) {
+		error = PTR_ERR(d_new_root);
+		goto out0;
+	}
+	d_put_old = namei(put_old);
+	if (IS_ERR(d_put_old)) {
+		error = PTR_ERR(d_put_old);
+		goto out1;
+	}
+	down(&mount_sem);
+	if (!d_new_root->d_inode || !d_put_old->d_inode) {
+		error = -ENOENT;
+		goto out2;
+	}
+	if (!S_ISDIR(d_new_root->d_inode->i_mode) ||
+	    !S_ISDIR(d_put_old->d_inode->i_mode)) {
+		error = -ENOTDIR;
+		goto out2;
+	}
+	error = -EBUSY;
+	if (d_new_root->d_sb == root->d_sb || d_put_old->d_sb == root->d_sb)
+		goto out2; /* loop */
+	if (d_put_old != d_put_old->d_covers)
+		goto out2; /* mount point is busy */
+	error = -EINVAL;
+	walk = d_put_old; /* make sure we can reach put_old from new_root */
+	for (;;) {
+		next = walk->d_covers->d_parent;
+		if (next == walk)
+			goto out2;
+		if (next == d_new_root)
+			break;
+		walk = next;
+	}
+
+	new_root_dev_root = d_new_root->d_sb->s_root;
+	covered = new_root_dev_root->d_covers;
+	new_root_dev_root->d_covers = new_root_dev_root;
+	dput(covered);
+	covered->d_mounts = covered;
+
+	root_dev_root = root->d_sb->s_root;
+	root_dev_root->d_covers = dget(d_put_old);
+	d_put_old->d_mounts = root_dev_root;
+	chroot_fs_refs(root,d_new_root);
+	error = 0;
+out2:
+	up(&mount_sem);
+	dput(d_put_old);
+out1:
+	dput(d_new_root);
+out0:
+	unlock_kernel();
+	return error;
 }
 
 
--- linux/fs/dcache.c.orig	Fri Jan  7 01:21:23 2000
+++ linux/fs/dcache.c	Wed Jan 26 03:30:24 2000
@@ -704,7 +704,8 @@
 /*
  * "buflen" should be PAGE_SIZE or more.
  */
-char * d_path(struct dentry *dentry, char *buffer, int buflen)
+char * __d_path(struct dentry *dentry, char *buffer, int buflen,
+    int no_unreachable)
 {
 	char * end = buffer+buflen;
 	char * retval;
@@ -730,8 +731,11 @@
 			break;
 		dentry = dentry->d_covers;
 		parent = dentry->d_parent;
-		if (dentry == parent)
+		if (dentry == parent) {
+			if (no_unreachable)
+				return NULL;
 			break;
+		}
 		namelen = dentry->d_name.len;
 		buflen -= namelen + 1;
 		if (buflen < 0)
@@ -743,6 +747,11 @@
 		dentry = parent;
 	}
 	return retval;
+}
+
+char * d_path(struct dentry *dentry, char *buffer, int buflen)
+{
+	return __d_path(dentry, buffer, buflen, 0);
 }
 
 /*
--- linux/include/linux/dcache.h.orig	Fri Jan  7 01:21:23 2000
+++ linux/include/linux/dcache.h	Wed Jan 26 03:30:51 2000
@@ -176,6 +176,7 @@
 
 /* write full pathname into buffer and return start of pathname */
 extern char * d_path(struct dentry *, char *, int);
+extern char * __d_path(struct dentry *, char *, int, int);
 
 /* Allocation counts.. */
 static __inline__ struct dentry * dget(struct dentry *dentry)
--- linux/include/asm-i386/unistd.h.orig	Tue Jan 11 03:15:58 2000
+++ linux/include/asm-i386/unistd.h	Wed Jan 26 00:01:55 2000
@@ -221,6 +221,7 @@
 #define __NR_setgid32		214
 #define __NR_setfsuid32		215
 #define __NR_setfsgid32		216
+#define __NR_pivot_root		217
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
--- linux/arch/i386/kernel/entry.S.orig	Tue Jan 11 03:15:58 2000
+++ linux/arch/i386/kernel/entry.S	Wed Jan 26 00:11:53 2000
@@ -617,6 +617,7 @@
 	.long SYMBOL_NAME(sys_setgid)
 	.long SYMBOL_NAME(sys_setfsuid)		/* 215 */
 	.long SYMBOL_NAME(sys_setfsgid)
+	.long SYMBOL_NAME(sys_pivot_root)
 
 
 	/*
@@ -625,6 +626,6 @@
 	 * entries. Don't panic if you notice that this hasn't
 	 * been shrunk every time we add a new system call.
 	 */
-	.rept NR_syscalls-216
+	.rept NR_syscalls-217
 		.long SYMBOL_NAME(sys_ni_syscall)
 	.endr
------------ próxima parte ------------
/* pivot_root.c - Change the root file system */

/* Written 2000 by Werner Almesberger */


#include <stdio.h>
#include <linux/unistd.h>


_syscall2(int,pivot_root,const char *,new_root,const char *,put_old)


int main(int argc,const char **argv)
{
    if (argc != 3) {
	fprintf(stderr,"usage: %s new_root put_old\n",argv[0]);
	return 1;
    }
    if (pivot_root(argv[1],argv[2]) < 0) {
	perror("pivot_root");
	return 1;
    }
    return 0;
}


Más información sobre la lista de distribución Ayuda