[LWN Logo]
[LWN.net]
From:	 Benjamin LaHaise <bcrl@redhat.com>
To:	 Linux Kernel <linux-kernel@vger.kernel.org>
Subject: [RFC] syscalls-by-name (aka dynamic syscalls / vsyscalls)
Date:	 Thu, 17 Jan 2002 17:19:30 -0500

The attached patch implements syscalls-by-name in an initial and 
rather crud method -- the x86isms need tidying up, but the technique 
is usable across platforms.  Syscalls provided by name are made 
available to userland by means of a dynamic shared library that 
maps and provides a jumptable into a set of vsyscalls.  The vsyscalls 
can then enter the kernel through whatever mechanism (the only means 
implemented right now is via a new syscall, sys_dynamic_syscall) the 
kernel choses.  Version mismatch between the kernel and the library 
is prevented by means of a unique fingerprint the library checks on 
load against what the kernel provides via /dev/vsys.

/dev/vsys is a simple device node that allows user code to map the 
vsyscall area into their address space.  This is different from the 
fixed address that x86-64 uses to allow UML emulation of vsyscalls, 
and prevent other problems that could crop up with the intrusion of 
unexpected code into user address space.

For a quick example on how a new syscall is declared, I added the following 
lines into fs/aio.c in the aio patch:

	#include <asm/vsyscall.h>
 	... rest of file ...
	add_dynamic_syscall(sys_io_setup);
	add_dynamic_syscall(sys_io_destroy);
	add_dynamic_syscall(sys_io_submit);
	add_dynamic_syscall(sys_io_cancel);
	add_dynamic_syscall(sys_io_wait);
	add_dynamic_syscall(sys_io_getevents);

Similarly, vsyscalls can be added by means of adding an attribute to a 
function declaration that puts the code into the VSYSCALL segment.  This 
segment is linked at a virtual address just above the top of the default 
user stack, but is kept within the kernel image and mapped via /dev/vsys.

My current todos on the code are to clean it up so that the arch specific 
parts are better seperated, add symbol versions, and resurrect the 
sysenter / sysexit code as the means of entering the dynamic syscalls.  
There's also TSC based gettimeofday() code to finish off and add...

The patch is against 2.4.9-acsomething + aio + stuff, but should be pretty 
easy to merge into recent kernels.  If anyone has any comments or tweaks, 
I'd like to hear about them.  Cheers,

		-ben

:r ~/syscall-by-name-A4.diff
diff -urN linux.wrk.orig/Makefile linux.wrk.diff/Makefile
--- linux.wrk.orig/Makefile	Mon Jan 14 23:22:42 2002
+++ linux.wrk.diff/Makefile	Thu Jan 17 16:00:55 2002
@@ -236,7 +236,7 @@
 	drivers/sound/pndsperm.c \
 	drivers/sound/pndspini.c \
 	drivers/atm/fore200e_*_fw.c drivers/atm/.fore200e_*.fw \
-	.version .config* config.in config.old \
+	.uniquebytes .version .config* config.in config.old \
 	scripts/tkparse scripts/kconfig.tk scripts/kconfig.tmp \
 	scripts/lxdialog/*.o scripts/lxdialog/lxdialog \
 	.menuconfig.log \
@@ -283,6 +283,7 @@
 
 vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o linuxsubdirs
 	@$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" kallsyms
+	@$(MAKE) -C ulib
 
 .PHONY:	kallsyms
 
@@ -332,14 +333,17 @@
 
 linuxsubdirs: $(patsubst %, _dir_%, $(SUBDIRS))
 
-$(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/linux/version.h include/config/MARKER
+$(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/linux/compile.h include/config/MARKER
 	$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C $(patsubst _dir_%, %, $@)
 
 $(TOPDIR)/include/linux/version.h: include/linux/version.h
 $(TOPDIR)/include/linux/compile.h: include/linux/compile.h
 
 newversion:
-	. scripts/mkversion > .version
+	. scripts/mkversion > .tmpversion
+	@mv -f .tmpversion .version
+	rm -f .uniquebytes
+	dd if=/dev/urandom of=.uniquebytes bs=1 count=16
 
 include/linux/compile.h: $(CONFIGURATION) include/linux/version.h newversion
 	@echo -n \#define UTS_VERSION \"\#`cat .version` > .ver
@@ -357,9 +361,10 @@
 	   echo \#define LINUX_COMPILE_DOMAIN ; \
 	 fi >> .ver
 	@echo \#define LINUX_COMPILER \"`$(CC) $(CFLAGS) -v 2>&1 | tail -1`\" >> .ver
+	@echo \#define LINUX_UNIQUE_BYTES `hexdump -e '1/1 "0x%02x, "' .uniquebytes | sed -e 's/, \$$//g'` >>.ver
 	@mv -f .ver $@
 
-include/linux/version.h: ./Makefile include/linux/autoconf.h 
+include/linux/version.h: ./Makefile include/linux/autoconf.h
 	@echo \#define UTS_RELEASE \"$(KERNELRELEASE)\" > .ver
 	@echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + $(PATCHLEVEL) \\* 256 + $(SUBLEVEL)` >> .ver
 	@echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' >>.ver
@@ -453,6 +458,7 @@
 	rm -f $(CLEAN_FILES)
 	rm -rf $(CLEAN_DIRS)
 	$(MAKE) -C Documentation/DocBook clean
+	$(MAKE) -C ulib clean
 
 mrproper: clean archmrproper
 	find . \( -size 0 -o -name .depend \) -type f -print | xargs rm -f
diff -urN linux.wrk.orig/arch/i386/Makefile linux.wrk.diff/arch/i386/Makefile
--- linux.wrk.orig/arch/i386/Makefile	Mon Jan 14 23:22:41 2002
+++ linux.wrk.diff/arch/i386/Makefile	Thu Jan 17 15:13:37 2002
@@ -127,7 +127,7 @@
 zlilo: vmlinux
 	@$(MAKEBOOT) BOOTIMAGE=zImage zlilo
 
-tmp:
+tmp: ulib
 	@$(MAKEBOOT) BOOTIMAGE=bzImage zlilo
 bzlilo: vmlinux
 	@$(MAKEBOOT) BOOTIMAGE=bzImage zlilo
diff -urN linux.wrk.orig/arch/i386/kernel/Makefile linux.wrk.diff/arch/i386/kernel/Makefile
--- linux.wrk.orig/arch/i386/kernel/Makefile	Mon Jan 14 23:22:40 2002
+++ linux.wrk.diff/arch/i386/kernel/Makefile	Thu Jan 17 15:08:43 2002
@@ -23,8 +23,7 @@
 obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
 		ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \
 		pci-dma.o i386_ksyms.o i387.o bluesmoke.o dmi_scan.o \
-		bootflag.o
-
+		bootflag.o vsysdata.o vunique.o dynamic_syscall.o
 
 ifdef CONFIG_PCI
 obj-y			+= pci-i386.o
@@ -53,3 +52,4 @@
 
 abi-machdep.o: $(abi-machdep-objs)
 	$(LD) -r -o $@ $(abi-machdep-objs)
+
diff -urN linux.wrk.orig/arch/i386/kernel/dynamic_syscall.c linux.wrk.diff/arch/i386/kernel/dynamic_syscall.c
--- linux.wrk.orig/arch/i386/kernel/dynamic_syscall.c	Wed Dec 31 19:00:00 1969
+++ linux.wrk.diff/arch/i386/kernel/dynamic_syscall.c	Thu Jan 17 13:39:07 2002
@@ -0,0 +1,66 @@
+/* arch/i386/kernel/dynamic_syscall.c
+ *	Entry code for dynamic syscalls on i386.
+ */
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+
+struct dummy_args {
+	long data[8];
+};
+
+extern struct vsyscall_entry {
+	long eip;
+	long (*call)(struct dummy_args args);
+} vsyscall_list_begin, vsyscall_list_end;
+
+long sys_dynamic_syscall(struct pt_regs regs) __attribute__((regparm(0)));
+
+long sys_dynamic_syscall(struct pt_regs regs)
+{
+	struct dummy_args dummy_args;
+	struct vsyscall_entry *ent = (void *)regs.edx;
+	void *args = (void *)regs.ecx;
+	long ret;
+
+	pr_debug("ent = %p  args = %p\n", ent, args);
+	pr_debug("eip = 0x%08lx\n", regs.eip);
+
+	/* The pointer must be aligned in the table. */
+	if (unlikely((long)ent & (sizeof(*ent) - 1))) {
+		pr_debug("unaligned\n");
+		goto err;
+	}
+
+	/* Bounds checking... */
+	if (unlikely(ent < &vsyscall_list_begin) ||
+	    unlikely(ent >= &vsyscall_list_end)) {
+		pr_debug("out of range %p <= %p < %p\n", 
+			&vsyscall_list_begin, ent,
+			&vsyscall_list_end);
+		goto err;
+	}
+	/* The entry should be valid now.  Verify that the caller's eip 
+	 * is correct.
+	 */
+	if (unlikely(ent->eip != regs.eip)) {
+		pr_debug("eip mismatch (0x%lx vs 0x%lx)\n", ent->eip, regs.eip);
+		goto err;
+	}
+
+	pr_debug("ent->call = %p\n", ent->call);
+
+	if (unlikely(copy_from_user(&dummy_args, args, sizeof(dummy_args))))
+		return -EFAULT;
+
+	ret = ent->call(dummy_args);
+
+	pr_debug("ret = 0x%08lx\n", ret);
+
+	return ret;
+err:
+	return -ENOSYS;
+}
diff -urN linux.wrk.orig/arch/i386/kernel/entry.S linux.wrk.diff/arch/i386/kernel/entry.S
--- linux.wrk.orig/arch/i386/kernel/entry.S	Wed Jan 16 00:34:52 2002
+++ linux.wrk.diff/arch/i386/kernel/entry.S	Wed Jan 16 17:02:37 2002
@@ -46,6 +46,7 @@
 #include <asm/segment.h>
 #define ASSEMBLY
 #include <asm/smp.h>
+#include <asm/unistd.h>
 
 EBX		= 0x00
 ECX		= 0x04
@@ -635,6 +636,11 @@
 # endif
 #endif
 
+	.rept __NR_sys_dynamic_syscall-(.-sys_call_table)/4
+		.long SYMBOL_NAME(sys_ni_syscall)
+	.endr
+	.long SYMBOL_NAME(sys_dynamic_syscall)
+
 	/* More patch resilient means of adding syscalls */
 	.rept 244-(.-sys_call_table)/4
 		.long SYMBOL_NAME(sys_ni_syscall)
diff -urN linux.wrk.orig/arch/i386/kernel/vsysdata.c linux.wrk.diff/arch/i386/kernel/vsysdata.c
--- linux.wrk.orig/arch/i386/kernel/vsysdata.c	Wed Dec 31 19:00:00 1969
+++ linux.wrk.diff/arch/i386/kernel/vsysdata.c	Wed Jan 16 00:35:39 2002
@@ -0,0 +1,11 @@
+/* vsysdata.c	- declarations for variables shared with the kernel
+ *
+ *	Items placed in .data.vsyscall have a kernel virtual address 
+ *	and are read/write from kernel space only.  The copy placed 
+ *	in .vsyscall_data are linked at a userspace address and are 
+ *	read only accessible from userland.
+ */
+#include <linux/vsyscall.h>
+
+union vsys_union user_vsys_cpudata[256] __attribute__((section(".vsyscall_data")));
+//asm(".globl vsys_cpudata ; bobbob = user_vsys_cpudata - vsyscall_text_begin ; vsys_cpudata = bobbob + VSYSCALL_text");
diff -urN linux.wrk.orig/arch/i386/kernel/vunique.S linux.wrk.diff/arch/i386/kernel/vunique.S
--- linux.wrk.orig/arch/i386/kernel/vunique.S	Wed Dec 31 19:00:00 1969
+++ linux.wrk.diff/arch/i386/kernel/vunique.S	Thu Jan 17 15:09:09 2002
@@ -0,0 +1,7 @@
+#include <linux/compile.h>
+
+	.section .first_vsyscall_text,"xa"
+	.globl	signature
+signature:
+	.byte	LINUX_UNIQUE_BYTES
+	.size	signature,.-signature
diff -urN linux.wrk.orig/arch/i386/vmlinux.lds linux.wrk.diff/arch/i386/vmlinux.lds
--- linux.wrk.orig/arch/i386/vmlinux.lds	Mon Jan 14 23:22:37 2002
+++ linux.wrk.diff/arch/i386/vmlinux.lds	Thu Jan 17 16:31:17 2002
@@ -13,6 +13,26 @@
         *(.fixup)
         *(.gnu.warning)
         } = 0x9090
+        /* Note: most of these declarations are in kernel/vsysdata.c,vsyscall.S.
+	 * We use two segments for the data liked at a kernel virtual address 
+	 * (.data.vsyscall) and user virtual address (.vsyscall_data).
+	 * .vsyscall_text is linked at a kernel virtual address
+	 */
+        . = ALIGN(4096);
+        VSYSCALL_text = .;
+        VSYSCALL 0xbfff0000 : AT ( VSYSCALL_text ) {
+                vsyscall_text_begin = .;
+                *(.first_vsyscall_text)
+                *(.vsyscall_text)
+                . = ALIGN(4096);
+                vsyscall_text_end = .;
+                *(.vsyscall_data)
+                . = ALIGN(4096);
+                vsyscall_data_end = .;
+        }
+        vsys_cpudata = user_vsys_cpudata - vsyscall_text_begin + VSYSCALL_text;
+        . = VSYSCALL_text + SIZEOF(VSYSCALL);
+        VSYSCALL_text_end = .;
   .text.lock : { *(.text.lock) } /* out-of-line lock text */
   .rodata : { *(.rodata) }
   .kstrtab : { *(.kstrtab) }
@@ -23,12 +43,21 @@
   __start___ksymtab = .; /* Kernel symbol table */
   __ksymtab : { *(__ksymtab) }
   __stop___ksymtab = .;
-   __start___kallsyms = .;	/* All kernel symbols */
+   __start___kallsyms = .; /* All kernel symbols */
    __kallsyms : { *(__kallsyms) }
    __stop___kallsyms = .;
   _etext = .; /* End of text section */
   .data : { /* Data */
         *(.data)
+        . = ALIGN(8);
+        vsyscall_list_begin = .;
+        *(.data.vsyscall_list)
+        vsyscall_list_end = .;
+        . = ALIGN(4096);
+        kernel_vsyscall_data_begin = .;
+        *(.data.vsyscall)
+        . = ALIGN(4096);
+        kernel_vsyscall_data_end = .;
         CONSTRUCTORS
         }
   _edata = .; /* End of data section */
@@ -70,4 +99,15 @@
   .stab.index 0 : { *(.stab.index) }
   .stab.indexstr 0 : { *(.stab.indexstr) }
   .comment 0 : { *(.comment) }
+/*
+#  VSYSCALL : {
+#	/ * vsyscall area *i /
+#	__vsyscall_begin = .;
+#	*(vsyscall_text)
+#	. = ALIGN(4096);
+#	*(.data.vsyscall)
+#	. = ALIGN(4096);
+#	__vsyscall_end = .;
+#  } >vsyscall_area
+*/
 }
diff -urN linux.wrk.orig/arch/i386/vmlinux.lds.S linux.wrk.diff/arch/i386/vmlinux.lds.S
--- linux.wrk.orig/arch/i386/vmlinux.lds.S	Mon Jan 14 23:22:37 2002
+++ linux.wrk.diff/arch/i386/vmlinux.lds.S	Thu Jan 17 15:09:34 2002
@@ -13,6 +13,28 @@
 	*(.fixup)
 	*(.gnu.warning)
 	} = 0x9090
+
+	/* Note: most of these declarations are in kernel/vsysdata.c,vsyscall.S.
+	 * We use two segments for the data liked at a kernel virtual address 
+	 * (.data.vsyscall) and user virtual address (.vsyscall_data).
+	 * .vsyscall_text is linked at a kernel virtual address
+	 */
+	. = ALIGN(4096);
+	VSYSCALL_text = .;
+	VSYSCALL 0xbfff0000 : AT ( VSYSCALL_text ) {
+		vsyscall_text_begin = .;
+		*(.first_vsyscall_text)
+		*(.vsyscall_text)
+		. = ALIGN(4096);
+		vsyscall_text_end = .;
+		*(.vsyscall_data)
+		. = ALIGN(4096);
+		vsyscall_data_end = .;
+	}
+	vsys_cpudata = user_vsys_cpudata - vsyscall_text_begin + VSYSCALL_text;
+	. = VSYSCALL_text + SIZEOF(VSYSCALL);
+	VSYSCALL_text_end = .;
+
   .text.lock : { *(.text.lock) }	/* out-of-line lock text */
   .rodata : { *(.rodata) }
   .kstrtab : { *(.kstrtab) }
@@ -34,6 +56,18 @@
 
   .data : {			/* Data */
 	*(.data)
+
+	. = ALIGN(8);
+	vsyscall_list_begin = .;
+	*(.data.vsyscall_list)
+	vsyscall_list_end = .;
+
+	. = ALIGN(4096);
+	kernel_vsyscall_data_begin = .;
+	*(.data.vsyscall)
+	. = ALIGN(4096);
+	kernel_vsyscall_data_end = .;
+
 	CONSTRUCTORS
 	}
 
@@ -83,4 +117,16 @@
   .stab.index 0 : { *(.stab.index) }
   .stab.indexstr 0 : { *(.stab.indexstr) }
   .comment 0 : { *(.comment) }
+
+/*
+#  VSYSCALL : {
+#	/ * vsyscall area *i /
+#	__vsyscall_begin = .;
+#	*(vsyscall_text)
+#	. = ALIGN(4096);
+#	*(.data.vsyscall)
+#	. = ALIGN(4096);
+#	__vsyscall_end = .;
+#  } >vsyscall_area
+*/
 }
diff -urN linux.wrk.orig/drivers/char/mem.c linux.wrk.diff/drivers/char/mem.c
--- linux.wrk.orig/drivers/char/mem.c	Mon Jan 14 23:22:37 2002
+++ linux.wrk.diff/drivers/char/mem.c	Thu Jan 17 13:47:22 2002
@@ -25,6 +25,7 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/pgalloc.h>
+#include <asm/a.out.h>
 
 #ifdef CONFIG_SENSORS
 extern void sensors_init_all(void);
@@ -539,6 +540,77 @@
 	write:		write_full,
 };
 
+int vsys_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	extern unsigned char vsyscall_text_begin, vsyscall_text_end, VSYSCALL_text[];
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long len = vma->vm_end - vma->vm_start;
+	unsigned long actual_len = &vsyscall_text_end - &vsyscall_text_begin;
+
+	if ((offset + len) > actual_len)
+		len = actual_len - offset;
+
+	pr_debug("len = 0x%lx, actual_len = 0x%lx\n", len, actual_len);
+
+	vma->vm_start = (unsigned long)&vsyscall_text_begin + offset;
+	vma->vm_end = vma->vm_start + len;
+	vma->vm_flags |= VM_RESERVED;
+
+	pr_debug("vm_start = 0x%lx, vm_end = 0x%lx\n",
+		vma->vm_start, vma->vm_end);
+	pr_debug("va=%p  pa=0x%lx\n",
+		VSYSCALL_text + offset,
+		__pa(VSYSCALL_text) + offset);
+
+	if (vma->vm_start < (unsigned long)&vsyscall_text_begin) {
+		pr_debug("vsys_mmap: start < begin\n");
+		return -EINVAL;
+	}
+
+	if (vma->vm_end < (unsigned long)&vsyscall_text_begin) {
+		pr_debug("vsys_mmap: end < begin\n");
+		return -EINVAL;
+	}
+
+	if (vma->vm_end > (unsigned long)&vsyscall_text_end) {
+		pr_debug("vsys_mmap: end(%lx) > text_end(%p)\n",
+			vma->vm_end, &vsyscall_text_end);
+		return -EINVAL;
+	}
+
+	if (vma->vm_start >= vma->vm_end) {
+		pr_debug("vsys_mmap: end\n");
+		return -EINVAL;
+	}
+
+	if (find_vma_intersection(current->mm, vma->vm_start, vma->vm_end)) {
+		pr_debug("vsyscall: mapping collision\n");
+		return -EINVAL;
+	}
+
+	if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == (VM_SHARED | VM_WRITE)) {
+		pr_debug("vsyscall: attempt to write to mapping\n");
+		return -EPERM;
+	}
+
+	if (remap_page_range(vma->vm_start,
+			     __pa(VSYSCALL_text) + offset,
+			     vma->vm_end-vma->vm_start,
+			     vma->vm_page_prot))
+		return -EAGAIN;
+
+	pr_debug("VSYSCALL_text(%p): %02x %02x %02x %02x\n",
+		VSYSCALL_text,
+		VSYSCALL_text[0], VSYSCALL_text[1],
+		VSYSCALL_text[2], VSYSCALL_text[3]);
+
+	return 0;
+}
+
+static struct file_operations vsys_fops = {
+	mmap:		vsys_mmap,
+};
+
 static int memory_open(struct inode * inode, struct file * filp)
 {
 	switch (MINOR(inode->i_rdev)) {
@@ -568,6 +640,9 @@
 		case 9:
 			filp->f_op = &urandom_fops;
 			break;
+		case 11:
+			filp->f_op = &vsys_fops;
+			break;
 		default:
 			return -ENXIO;
 	}
@@ -592,7 +667,8 @@
 	{5, "zero",    S_IRUGO | S_IWUGO,           &zero_fops},
 	{7, "full",    S_IRUGO | S_IWUGO,           &full_fops},
 	{8, "random",  S_IRUGO | S_IWUSR,           &random_fops},
-	{9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops}
+	{9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops},
+	{11,"vsys",    S_IRUGO | S_IWUSR,           &vsys_fops},
     };
     int i;
 
diff -urN linux.wrk.orig/include/asm-i386/a.out.h linux.wrk.diff/include/asm-i386/a.out.h
--- linux.wrk.orig/include/asm-i386/a.out.h	Fri Jun 16 14:33:06 1995
+++ linux.wrk.diff/include/asm-i386/a.out.h	Thu Jan 17 16:25:31 2002
@@ -19,7 +19,9 @@
 
 #ifdef __KERNEL__
 
-#define STACK_TOP	TASK_SIZE
+#define VSYSCALL_SIZE		0x10000		/* 64KB for vsyscalls */
+#define STACK_GUARD_SIZE	0x02000		/* 8KB guard area */
+#define STACK_TOP	(TASK_SIZE - VSYSCALL_SIZE - STACK_GUARD_SIZE)
 
 #endif
 
diff -urN linux.wrk.orig/include/asm-i386/unistd.h linux.wrk.diff/include/asm-i386/unistd.h
--- linux.wrk.orig/include/asm-i386/unistd.h	Wed Jan 16 00:34:52 2002
+++ linux.wrk.diff/include/asm-i386/unistd.h	Wed Jan 16 17:09:10 2002
@@ -228,6 +228,8 @@
 #define __NR_getdents64		220
 #define __NR_fcntl64		221
 
+#define __NR_sys_dynamic_syscall	230
+
 /* gap here for now */
 #define	__NR_io_setup		244
 #define	__NR_io_destroy		245
diff -urN linux.wrk.orig/include/asm-i386/vsyscall.h linux.wrk.diff/include/asm-i386/vsyscall.h
--- linux.wrk.orig/include/asm-i386/vsyscall.h	Wed Dec 31 19:00:00 1969
+++ linux.wrk.diff/include/asm-i386/vsyscall.h	Thu Jan 17 01:53:32 2002
@@ -0,0 +1,41 @@
+#ifndef __ASM__VSYSCALL_H
+#define __ASM__VSYSCALL_H
+/* include/asm-i386/vsyscall.h
+ *	Copyright 2002 Red Hat, Inc.
+ */
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+/* We call sys_dynamic_syscall(long nr, void *args) using regparm(2)
+ * convention.  The .text.vsyscall section is mapped into userspace, 
+ * whereas .data.vsyscall_list is a kernel-only array of the vsyscalls 
+ * and the valid userspace address to call them from.  All vsyscalls 
+ * are called with C calling convention (ie args on the stack for x86).
+ *
+ * Note: the layout of .data.vsyscall_list must match the entries in
+ * dynamic_syscall.c.
+ */
+#define STRINGIFYa(x)	#x
+#define STRINGIFY(x)	STRINGIFYa(x)
+#define NR_dyn_sys	STRINGIFY(__NR_sys_dynamic_syscall)
+#define add_dynamic_syscall(name)				\
+	__asm__("						\n\
+	.section .vsyscall_text, \"xa\"				\n\
+	.globl v" #name "					\n\
+	v" #name ":						\n\
+		push %ecx					\n\
+		push %edx					\n\
+		movl $" NR_dyn_sys ",%eax			\n\
+		movl $2f,%edx					\n\
+		leal 12(%esp),%ecx				\n\
+		int $0x80					\n\
+	1:							\n\
+		popl %edx					\n\
+		popl %ecx					\n\
+		ret						\n\
+	.size v" #name ",.-v" #name "				\n\
+	.section .data.vsyscall_list,\"a\"			\n\
+	2:	.long	1b," #name "				\n\
+	.previous")
+
+#endif
diff -urN linux.wrk.orig/include/linux/vsyscall.h linux.wrk.diff/include/linux/vsyscall.h
--- linux.wrk.orig/include/linux/vsyscall.h	Wed Dec 31 19:00:00 1969
+++ linux.wrk.diff/include/linux/vsyscall.h	Wed Jan 16 00:35:39 2002
@@ -0,0 +1,20 @@
+#ifndef _LINUX__VSYSCALL_H
+#define _LINUX__VSYSCALL_H
+
+struct vsys_cpudata {
+	unsigned long	context_switches;
+	unsigned long	tv_sec;
+	unsigned long	tsc_low, tsc_high;
+	unsigned long	cycles_per_sec;
+};
+
+union vsys_union {
+	struct vsys_cpudata	data;
+	char			pad[128];
+};
+
+extern union vsys_union	vsys_cpudata[256] __attribute__((section(".data.vsyscall")));
+
+#define vsys_data(cpu)	(&vsys_cpudata[cpu].data)
+
+#endif /*ndef _LINUX__VSYSCALL_H*/
diff -urN linux.wrk.orig/ulib/Makefile linux.wrk.diff/ulib/Makefile
--- linux.wrk.orig/ulib/Makefile	Wed Dec 31 19:00:00 1969
+++ linux.wrk.diff/ulib/Makefile	Thu Jan 17 13:56:25 2002
@@ -0,0 +1,26 @@
+all: libkernel.so
+
+ASFLAGS=-D__KERNEL__ -D__ASSEMBLY__ -I../include -nostdlib -nostartfiles
+CFLAGS=-D__KERNEL__ -I../include -nostdlib -nostartfiles
+
+vsysaddr.S: ../System.map stub.S Makefile
+	rm -f vsysaddr.S
+	echo '#include "stub.S"' >vsysaddr.S
+	awk -- "/^bfff.* vsys_/ { print \"dynamic_syscall(\"\$$3 \",0x\" \$$1 \")\"; }" <../System.map >>vsysaddr.S
+
+vsysaddr.o: vsysaddr.S
+
+kso_init.o: ../include/linux/compile.h
+
+libkernel.so.1: vsysaddr.o kso_init.o
+	ld -E -symbolic -shared -soname=libkernel.so -o $@ vsysaddr.o kso_init.o
+
+libkernel.so: libkernel.so.1
+	ln -sf $< $@
+
+clean:
+	rm -f *.o libkernel.so myln libkernel.so.1 vsysaddr.S
+
+# test app
+myln: myln.c libkernel.so Makefile
+	cc -g -o myln myln.c -L. -lkernel
diff -urN linux.wrk.orig/ulib/kso_init.c linux.wrk.diff/ulib/kso_init.c
--- linux.wrk.orig/ulib/kso_init.c	Wed Dec 31 19:00:00 1969
+++ linux.wrk.diff/ulib/kso_init.c	Thu Jan 17 16:16:15 2002
@@ -0,0 +1,48 @@
+#include <linux/compile.h>
+#include <linux/types.h>
+#include <asm/unistd.h>
+#include <asm/fcntl.h>
+#include <asm/mman.h>
+#include <asm/a.out.h>
+
+char libkernel_enosys = 1;	/* the asm in stub.S depends on this */
+
+long _init(void)
+{
+	static char unique[] = { LINUX_UNIQUE_BYTES };
+	int errno;
+	long addr;
+	int fd;
+	int i;
+
+	_syscall6(int, mmap2, unsigned long, addr, unsigned long, len,
+        	  unsigned long, prot, unsigned long, flags,
+        	  unsigned long, fd, unsigned long, pgoff)
+	_syscall2(long, munmap, unsigned long, addr, size_t, len)
+	_syscall2(int, open, const char *, name, int, flags)
+	_syscall1(int, close, int, fd)
+
+	if (sizeof(unique) != 16)
+		return -1;
+
+	fd = open("/dev/vsys", O_RDONLY);
+	if (-1 == fd)
+		return -1;
+
+	addr = mmap2(0, VSYSCALL_SIZE, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+	if (-1 == addr)
+		return -1;
+
+	close(fd);
+
+	for (i=0; i<sizeof(unique); i++)
+		if (unique[i] != ((char *)addr)[i]) {
+			munmap(addr, VSYSCALL_SIZE);
+			return -1;
+		}
+
+	/* okay, all the syscalls we provide are now good */
+	libkernel_enosys = 0;
+	return 0;
+}
+
diff -urN linux.wrk.orig/ulib/myln.c linux.wrk.diff/ulib/myln.c
--- linux.wrk.orig/ulib/myln.c	Wed Dec 31 19:00:00 1969
+++ linux.wrk.diff/ulib/myln.c	Thu Jan 17 09:18:59 2002
@@ -0,0 +1,25 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+int main ()
+{
+	long ctx = 0;
+	extern long vsys_io_setup(long, long *);
+	unsigned char *bob = (void*)&vsys_io_setup;
+	long ret;
+	int i;
+	printf("%p\n", bob);
+	//printf("%p\n", mmap(0, 65536, PROT_READ | PROT_EXEC, MAP_SHARED,
+	//	open("/dev/vsys", O_RDONLY), 0));
+	//for (i=0; i<16; i++)
+	//	printf(" %02x\n", bob[i]);
+	//printf("\n");
+
+	ret = vsys_io_setup(100, &ctx);
+
+	printf("ret=%ld, ctx=0x%lx\n", ret, ctx);
+	return 0;
+}
diff -urN linux.wrk.orig/ulib/stub.S linux.wrk.diff/ulib/stub.S
--- linux.wrk.orig/ulib/stub.S	Wed Dec 31 19:00:00 1969
+++ linux.wrk.diff/ulib/stub.S	Thu Jan 17 16:25:38 2002
@@ -0,0 +1,18 @@
+/* stub.S */
+#include <asm/segment.h>
+#include <asm/errno.h>
+
+	.text
+
+#define dynamic_syscall(x,a) \
+	.globl	x				;\
+	.align 16				;\
+	x:					;\
+		cmpb $0,libkernel_enosys	;\
+		jne 1f				;\
+		ljmp $__USER_CS, $a		;\
+	1:					;\
+		movl	$-ENOSYS,%eax		;\
+		ret				;\
+	.size	 x,.-x
+
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/