[LWN Logo]
[LWN.net]
From:	 Daniel Phillips <phillips@bonn-fries.net>
To:	 lse-tech@lists.sourceforge.net
Subject: Re: [Lse-tech] [PATCH] Nonlinear kernel virtual to physical mapping for uml
Date:	 Mon, 8 Apr 2002 01:00:18 +0200

Here's a updated version of the config_nonlinear patch, with improved 
factoring of the primary functions:

   unsigned long logical_to_phys(unsigned long p)
   unsigned long phys_to_logical(unsigned long p)

   unsigned long ordinal_to_phys(unsigned long n)
   unsigned long phys_to_ordinal(unsigned long p)

which have the following simple definitions when config_nonlinear is not 
defined:

   #define logical_to_phys(p) (p)
   #define phys_to_logical(p) (p)
   #define ordinal_to_phys(n) ((n) << PAGE_SHIFT)
   #define phys_to_ordinal(p) ((p) >> PAGE_SHIFT)

and otherwise, each is a table translation.  I am trying the terminology 
'ordinal' in this patch as a substitute for 'pagenum', so we have logical and 
ordinal, related by PAGE_SHIFT.

I've adopted the tradition abbreviations 'phys' and 'virt' for physical and
virtual, which makes the patch smaller, but otherwise I don't like those 
names much - I'd rather write 'physical_to_virtual' than 'phys_to_virt'.  
That's just me though, I'd appreciate opinions.

I've had some time now to think about how this patch relates to config_numa, 
and my feeling is, it's orthogonal.  It is not a lower layer for numa.  The 
practical implication is that config_discontigmem can be removed entirely and 
config_numa can be written in the way that is best for numa support, and be 
freed from the necessity to support the non-numa discontig usage in certain 
architectures, for which the config_nonlinear approach is better in every way.

For 32 bit numa, config_nonlinear is needed in order to provide some 
zone_normal memory on every node, mapped to the local memory of that node.  
In this case, the combination of config_nonlinear and config_numa could be 
optimized to avoid extra table lookups in some cases.  At this point, I have 
not thought deeply about the details.

--- ../2.4.17.uml.clean/arch/um/config.in	Mon Mar 25 17:27:25 2002
+++ ./arch/um/config.in	Fri Apr  5 10:30:08 2002
@@ -36,6 +36,7 @@
 bool '2G/2G host address space split' CONFIG_HOST_2G_2G
 bool 'Symmetric multi-processing support' CONFIG_UML_SMP
 define_bool CONFIG_SMP $CONFIG_UML_SMP
+bool 'Support for nonlinear physical memory' CONFIG_NONLINEAR
 string 'Default main console channel initialization' CONFIG_CON_ZERO_CHAN \
 	"fd:0,fd:1"
 string 'Default console channel initialization' CONFIG_CON_CHAN "xterm"
--- ../2.4.17.uml.clean/arch/um/kernel/process_kern.c	Mon Mar 25 17:27:26 2002
+++ ./arch/um/kernel/process_kern.c	Sat Apr  6 11:07:55 2002
@@ -501,12 +501,8 @@
 #ifdef CONFIG_SMP
 	return("(Unknown)");
 #else
-	unsigned long addr;
-
-	if((addr = um_virt_to_phys(current, 
-				   current->mm->arg_start)) == 0xffffffff) 
-		return("(Unknown)");
-	else return((char *) addr);
+	unsigned long addr = um_virt_to_phys(current, current->mm->arg_start);
+	return addr == 0xffffffff? "(Unknown)": phys_to_virt(addr);
 #endif
 }
 
--- ../2.4.17.uml.clean/arch/um/kernel/um_arch.c	Mon Mar 25 17:27:27 2002
+++ ./arch/um/kernel/um_arch.c	Sat Apr  6 11:18:22 2002
@@ -270,6 +270,44 @@
 extern int jail;
 void *brk_start;
 
+#ifdef CONFIG_NONLINEAR
+unsigned long psection[MAX_SECTIONS];
+unsigned long vsection[MAX_SECTIONS];
+
+static int init_nonlinear(void)
+{
+	unsigned i, shift = SECTION_SHIFT - PAGE_SHIFT;
+
+	memset(psection, -1, sizeof(psection));
+	memset(vsection, -1, sizeof(vsection));
+	for (i = 0; i < MAX_SECTIONS; i++)
+		psection[i] = (i ^ (i >= 2)) << shift;
+
+	for (i = 0; i < MAX_SECTIONS; i++)
+		if (~psection[i] && psection[i] >> shift < MAX_SECTIONS)
+			vsection[psection[i] >> shift] = i << shift;
+
+	return 0;
+}
+
+static void show_nonlinear(void)
+{
+	int i;
+	printk(">>> logical section to physical num: ");
+	for (i = 0; i < MAX_SECTIONS; i++) printk("%lx ", psection[i]); 
printk("\n");
+	printk(">>> physical section to logical num: ");
+	for (i = 0; i < MAX_SECTIONS; i++) printk("%lx ", vsection[i]); 
printk("\n");
+}
+
+#else
+#  ifndef nil
+#    define nil do { } while (0)
+#  endif
+
+#define init_nonlinear() nil
+#define show_nonlinear() nil
+#endif
+
 int linux_main(int argc, char **argv)
 {
 	unsigned long start_pfn, end_pfn, bootmap_size;
@@ -294,6 +332,9 @@
 	/* Start physical memory at least 4M after the current brk */
 	uml_physmem = ROUND_4M(brk_start) + (1 << 22);
 
+	init_nonlinear();
+	show_nonlinear();
+
 	setup_machinename(system_utsname.machine);
 
 	argv1_begin = argv[1];
@@ -322,10 +363,10 @@
 	setup_memory();
 	high_physmem = uml_physmem + physmem_size;
 
-	start_pfn = PFN_UP(__pa(uml_physmem));
-	end_pfn = PFN_DOWN(__pa(high_physmem));
+	start_pfn = PFN_UP(virt_to_logical(uml_physmem));
+	end_pfn = PFN_DOWN(virt_to_logical(high_physmem));
 	bootmap_size = init_bootmem(start_pfn, end_pfn - start_pfn);
-	free_bootmem(__pa(uml_physmem) + bootmap_size, 
+	free_bootmem(virt_to_logical(uml_physmem) + bootmap_size, 
 		     high_physmem - uml_physmem - bootmap_size);
   	uml_postsetup();
 
--- ../2.4.17.uml.clean/drivers/char/mem.c	Fri Dec 21 18:41:54 2001
+++ ./drivers/char/mem.c	Sat Apr  6 11:43:52 2002
@@ -79,7 +79,7 @@
 	unsigned long end_mem;
 	ssize_t read;
 	
-	end_mem = __pa(high_memory);
+	end_mem = virt_to_logical(high_memory);
 	if (p >= end_mem)
 		return 0;
 	if (count > end_mem - p)
@@ -101,7 +101,7 @@
 		}
 	}
 #endif
-	if (copy_to_user(buf, __va(p), count))
+	if (copy_to_user(buf, logical_to_virt(p), count))
 		return -EFAULT;
 	read += count;
 	*ppos += read;
@@ -114,12 +114,12 @@
 	unsigned long p = *ppos;
 	unsigned long end_mem;
 
-	end_mem = __pa(high_memory);
+	end_mem = virt_to_logical(high_memory);
 	if (p >= end_mem)
 		return 0;
 	if (count > end_mem - p)
 		count = end_mem - p;
-	return do_write_mem(file, __va(p), p, buf, count, ppos);
+	return do_write_mem(file, logical_to_virt(p), p, buf, count, ppos);
 }
 
 #ifndef pgprot_noncached
@@ -178,7 +178,7 @@
 		  test_bit(X86_FEATURE_CENTAUR_MCR, &boot_cpu_data.x86_capability) )
 	  && addr >= __pa(high_memory);
 #else
-	return addr >= __pa(high_memory);
+	return addr >= virt_to_phys(high_memory); // bogosity alert!!
 #endif
 }
 
@@ -200,7 +200,7 @@
 	/*
 	 * Don't dump addresses that are not real memory to a core file.
 	 */
-	if (offset >= __pa(high_memory) || (file->f_flags & O_SYNC))
+	if (offset >= virt_to_logical(high_memory) || (file->f_flags & O_SYNC))
 		vma->vm_flags |= VM_IO;
 
 	if (remap_page_range(vma->vm_start, offset, vma->vm_end-vma->vm_start,
--- ../2.4.17.uml.clean/fs/proc/kcore.c	Fri Sep 14 01:04:43 2001
+++ ./fs/proc/kcore.c	Mon Apr  8 00:06:28 2002
@@ -50,7 +50,7 @@
 
 	memset(&dump, 0, sizeof(struct user));
 	dump.magic = CMAGIC;
-	dump.u_dsize = (virt_to_phys(high_memory) >> PAGE_SHIFT);
+	dump.u_dsize = (logical_to_virt(high_memory) >> PAGE_SHIFT);
 #if defined (__i386__) || defined(__x86_64__)
 	dump.start_code = PAGE_OFFSET;
 #endif
@@ -58,7 +58,7 @@
 	dump.start_data = PAGE_OFFSET;
 #endif
 
-	memsize = virt_to_phys(high_memory);
+	memsize = virt_to_logical(high_memory);
 	if (p >= memsize)
 		return 0;
 	if (count > memsize - p)
@@ -239,7 +239,7 @@
 	phdr->p_flags	= PF_R|PF_W|PF_X;
 	phdr->p_offset	= dataoff;
 	phdr->p_vaddr	= PAGE_OFFSET;
-	phdr->p_paddr	= __pa(PAGE_OFFSET);
+	phdr->p_paddr	= virt_to_phys(PAGE_OFFSET);
 	phdr->p_filesz	= phdr->p_memsz = ((unsigned long)high_memory - PAGE_OFFSET);
 	phdr->p_align	= PAGE_SIZE;
 
@@ -256,7 +256,7 @@
 		phdr->p_flags	= PF_R|PF_W|PF_X;
 		phdr->p_offset	= (size_t)m->addr - PAGE_OFFSET + dataoff;
 		phdr->p_vaddr	= (size_t)m->addr;
-		phdr->p_paddr	= __pa(m->addr);
+		phdr->p_paddr	= virt_to_phys(m->addr);
 		phdr->p_filesz	= phdr->p_memsz	= m->size;
 		phdr->p_align	= PAGE_SIZE;
 	}
@@ -382,7 +382,7 @@
 	}
 #endif
 	/* fill the remainder of the buffer from kernel VM space */
-	start = (unsigned long)__va(*fpos - elf_buflen);
+	start = (unsigned long) logical_to_virt(*fpos - elf_buflen);
 	if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
 		tsz = buflen;
 		
--- ../2.4.17.uml.clean/include/asm-i386/io.h	Wed Mar 27 23:31:33 2002
+++ ./include/asm-i386/io.h	Sat Apr  6 11:13:25 2002
@@ -60,20 +60,6 @@
 #endif
 
 /*
- * Change virtual addresses to physical addresses and vv.
- * These are pretty trivial
- */
-static inline unsigned long virt_to_phys(void *address)
-{
-	return __pa(address);
-}
-
-static inline void * phys_to_virt(unsigned long address)
-{
-	return __va(address);
-}
-
-/*
  * Change "struct page" to physical address.
  */
 #define page_to_phys(page)	((page - mem_map) << PAGE_SHIFT)
--- ../2.4.17.uml.clean/include/asm-um/page.h	Wed Mar 27 23:31:33 2002
+++ ./include/asm-um/page.h	Sat Apr  6 11:12:38 2002
@@ -29,30 +29,81 @@
 
 #endif /* __ASSEMBLY__ */
 
+#define __va_space (8*1024*1024)
+
 extern unsigned long uml_physmem;
+extern unsigned long max_mapnr;
 
-#define __va_space (8*1024*1024)
+static inline int VALID_PAGE(struct page *page)
+{
+	return page - mem_map < max_mapnr;
+}
 
-static inline unsigned long __pa(void *virt)
+static inline void *logical_to_virt(unsigned long p)
 {
-	return (unsigned long) (virt) - PAGE_OFFSET;
+	return (void *) ((unsigned long) p + PAGE_OFFSET);
 }
 
-static inline void *__va(unsigned long phys)
+static inline unsigned long virt_to_logical(void *v)
 {
-	return (void *) ((unsigned long) (phys) + PAGE_OFFSET);
+//	assert(it's a kernel virtual);
+	return (unsigned long) v - PAGE_OFFSET;
 }
 
-static inline struct page *virt_to_page(void *kaddr)
+#ifdef CONFIG_NONLINEAR
+#define MAX_SECTIONS (32)
+#define SECTION_SHIFT 20 /* 1 meg sections */
+#define SECTION_MASK (~(-1 << SECTION_SHIFT))
+
+extern unsigned long psection[MAX_SECTIONS];
+extern unsigned long vsection[MAX_SECTIONS];
+
+static inline unsigned long logical_to_phys(unsigned long p)
 {
-	return mem_map + (__pa(kaddr) >> PAGE_SHIFT);
+	return (psection[p >> SECTION_SHIFT] << PAGE_SHIFT) + (p & SECTION_MASK);
 }
 
-extern unsigned long max_mapnr;
+static inline unsigned long phys_to_logical(unsigned long p)
+{
+	return (vsection[p >> SECTION_SHIFT] << PAGE_SHIFT) + (p  & SECTION_MASK);
+}
 
-static inline int VALID_PAGE(struct page *page)
+static inline unsigned long ordinal_to_phys(unsigned long n)
 {
-	return page - mem_map < max_mapnr;
+	return ( psection[n >> (SECTION_SHIFT - PAGE_SHIFT)] +
+	    (n & (SECTION_MASK >> PAGE_SHIFT)) ) << PAGE_SHIFT;
+}
+
+static inline unsigned long phys_to_ordinal(unsigned long p)
+{
+	return vsection[p >> SECTION_SHIFT] + ((p & SECTION_MASK) >> PAGE_SHIFT);
+}
+
+#else
+#define logical_to_phys(p) (p)
+#define phys_to_logical(p) (p)
+#define ordinal_to_phys(n) ((n) << PAGE_SHIFT)
+#define phys_to_ordinal(p) ((p) >> PAGE_SHIFT)
+#endif /* CONFIG_NONLINEAR */
+
+static inline struct page *virt_to_page(void *v)
+{
+	return mem_map + (virt_to_logical(v) >> PAGE_SHIFT);
+}
+
+static inline struct page *phys_to_page(unsigned long p)
+{
+	return mem_map + phys_to_ordinal(p);
+}
+
+static inline unsigned long virt_to_phys(void *v)
+{
+	return logical_to_phys(virt_to_logical(v));
+}
+
+static inline void *phys_to_virt(unsigned long p)
+{
+	return logical_to_virt(phys_to_logical(p));
 }
 
 #endif
--- ../2.4.17.uml.clean/include/asm-um/pgtable.h	Mon Mar 25 17:27:28 2002
+++ ./include/asm-um/pgtable.h	Sat Apr  6 11:13:25 2002
@@ -197,9 +197,8 @@
 #define page_address(page) ({ if (!(page)->virtual) BUG(); (page)->virtual; 
})
 #define __page_address(page) ({ PAGE_OFFSET + (((page) - mem_map) << 
PAGE_SHIFT); })
 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
-#define pte_page(x) \
-    (mem_map+((unsigned long)((__pa(pte_val(x)) >> PAGE_SHIFT))))
-#define pte_address(x) ((void *) ((unsigned long) pte_val(x) & PAGE_MASK))
+#define pte_page(x) (mem_map + phys_to_ordinal(pte_val(x)))
+#define pte_address(x) (phys_to_virt(pte_val(x) & PAGE_MASK))
 
 static inline pte_t pte_mknewprot(pte_t pte)
 {
@@ -313,18 +312,17 @@
  * and a page entry and page directory to the page they refer to.
  */
 
-#define mk_pte(page, pgprot) \
-({					\
-	pte_t __pte;                    \
-                                        \
-	pte_val(__pte) = ((unsigned long) __va((page-mem_map)*(unsigned 
long)PAGE_SIZE + pgprot_val(pgprot)));         \
-	if(pte_present(__pte)) pte_mknewprot(pte_mknewpage(__pte)); \
-	__pte;                          \
-})
+static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) {
+	pte_t pte;
+	pte_val(pte) = ordinal_to_phys(page - mem_map) + pgprot_val(pgprot);
+	if (pte_present(pte))
+		pte_mknewprot(pte_mknewpage(pte));
+	return pte;
+}
 
 /* This takes a physical page address that is used by the remapping 
functions */
 #define mk_pte_phys(physpage, pgprot) \
-({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; })
+({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); BUG(); 
__pte; })
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
--- ../2.4.17.uml.clean/include/linux/bootmem.h	Thu Nov 22 20:47:23 2001
+++ ./include/linux/bootmem.h	Sat Apr  6 11:18:19 2002
@@ -35,11 +35,11 @@
 extern void __init free_bootmem (unsigned long addr, unsigned long size);
 extern void * __init __alloc_bootmem (unsigned long size, unsigned long 
align, unsigned long goal);
 #define alloc_bootmem(x) \
-	__alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem((x), SMP_CACHE_BYTES, virt_to_logical((void *) 
MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
 	__alloc_bootmem((x), SMP_CACHE_BYTES, 0)
 #define alloc_bootmem_pages(x) \
-	__alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem((x), PAGE_SIZE, virt_to_logical((void *) MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages(x) \
 	__alloc_bootmem((x), PAGE_SIZE, 0)
 extern unsigned long __init free_all_bootmem (void);
@@ -50,9 +50,9 @@
 extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
 extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long 
size, unsigned long align, unsigned long goal);
 #define alloc_bootmem_node(pgdat, x) \
-	__alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, virt_to_logical((void 
*) MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
-	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, virt_to_logical((void *) 
MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages_node(pgdat, x) \
 	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
 
--- ../2.4.17.uml.clean/mm/bootmem.c	Fri Dec 21 18:42:04 2001
+++ ./mm/bootmem.c	Sat Apr  6 11:47:26 2002
@@ -51,7 +51,7 @@
 	pgdat_list = pgdat;
 
 	mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL);
-	bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
+	bdata->node_bootmem_map = logical_to_virt(mapstart << PAGE_SHIFT);
 	bdata->node_boot_start = (start << PAGE_SHIFT);
 	bdata->node_low_pfn = end;
 
@@ -214,12 +214,12 @@
 			areasize = 0;
 			// last_pos unchanged
 			bdata->last_offset = offset+size;
-			ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
+			ret = logical_to_virt(bdata->last_pos*PAGE_SIZE + offset +
 						bdata->node_boot_start);
 		} else {
 			remaining_size = size - remaining_size;
 			areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE;
-			ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
+			ret = logical_to_virt(bdata->last_pos*PAGE_SIZE + offset +
 						bdata->node_boot_start);
 			bdata->last_pos = start+areasize-1;
 			bdata->last_offset = remaining_size;
@@ -228,7 +228,7 @@
 	} else {
 		bdata->last_pos = start + areasize - 1;
 		bdata->last_offset = size & ~PAGE_MASK;
-		ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
+		ret = logical_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
 	}
 	/*
 	 * Reserve the area now:
--- ../2.4.17.uml.clean/mm/memory.c	Fri Dec 21 18:42:05 2001
+++ ./mm/memory.c	Mon Apr  8 00:09:23 2002
@@ -806,7 +806,7 @@
 		pte_t oldpage;
 		oldpage = ptep_get_and_clear(pte);
 
-		page = virt_to_page(__va(phys_addr));
+		page = phys_to_page(phys_addr);
 		if ((!VALID_PAGE(page)) || PageReserved(page))
  			set_pte(pte, mk_pte_phys(phys_addr, prot));
 		forget_pte(oldpage);
--- ../2.4.17.uml.clean/mm/page_alloc.c	Tue Nov 20 01:35:40 2001
+++ ./mm/page_alloc.c	Sat Apr  6 11:42:43 2002
@@ -735,7 +735,7 @@
 			struct page *page = mem_map + offset + i;
 			page->zone = zone;
 			if (j != ZONE_HIGHMEM)
-				page->virtual = __va(zone_start_paddr);
+				page->virtual = logical_to_virt(zone_start_paddr);
 			zone_start_paddr += PAGE_SIZE;
 		}
 

_______________________________________________
Lse-tech mailing list
Lse-tech@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/lse-tech

Sponsored by http://www.ThinkGeek.com/