![[LWN Logo]](/images/lcorner.png) |
|
![[LWN.net]](/images/Included.png) |
From: Daniel Phillips <phillips@bonn-fries.net>
To: lse-tech@lists.sourceforge.net
Subject: Re: [Lse-tech] [PATCH] Nonlinear kernel virtual to physical mapping for uml
Date: Mon, 8 Apr 2002 01:00:18 +0200
Here's a updated version of the config_nonlinear patch, with improved
factoring of the primary functions:
unsigned long logical_to_phys(unsigned long p)
unsigned long phys_to_logical(unsigned long p)
unsigned long ordinal_to_phys(unsigned long n)
unsigned long phys_to_ordinal(unsigned long p)
which have the following simple definitions when config_nonlinear is not
defined:
#define logical_to_phys(p) (p)
#define phys_to_logical(p) (p)
#define ordinal_to_phys(n) ((n) << PAGE_SHIFT)
#define phys_to_ordinal(p) ((p) >> PAGE_SHIFT)
and otherwise, each is a table translation. I am trying the terminology
'ordinal' in this patch as a substitute for 'pagenum', so we have logical and
ordinal, related by PAGE_SHIFT.
I've adopted the tradition abbreviations 'phys' and 'virt' for physical and
virtual, which makes the patch smaller, but otherwise I don't like those
names much - I'd rather write 'physical_to_virtual' than 'phys_to_virt'.
That's just me though, I'd appreciate opinions.
I've had some time now to think about how this patch relates to config_numa,
and my feeling is, it's orthogonal. It is not a lower layer for numa. The
practical implication is that config_discontigmem can be removed entirely and
config_numa can be written in the way that is best for numa support, and be
freed from the necessity to support the non-numa discontig usage in certain
architectures, for which the config_nonlinear approach is better in every way.
For 32 bit numa, config_nonlinear is needed in order to provide some
zone_normal memory on every node, mapped to the local memory of that node.
In this case, the combination of config_nonlinear and config_numa could be
optimized to avoid extra table lookups in some cases. At this point, I have
not thought deeply about the details.
--- ../2.4.17.uml.clean/arch/um/config.in Mon Mar 25 17:27:25 2002
+++ ./arch/um/config.in Fri Apr 5 10:30:08 2002
@@ -36,6 +36,7 @@
bool '2G/2G host address space split' CONFIG_HOST_2G_2G
bool 'Symmetric multi-processing support' CONFIG_UML_SMP
define_bool CONFIG_SMP $CONFIG_UML_SMP
+bool 'Support for nonlinear physical memory' CONFIG_NONLINEAR
string 'Default main console channel initialization' CONFIG_CON_ZERO_CHAN \
"fd:0,fd:1"
string 'Default console channel initialization' CONFIG_CON_CHAN "xterm"
--- ../2.4.17.uml.clean/arch/um/kernel/process_kern.c Mon Mar 25 17:27:26 2002
+++ ./arch/um/kernel/process_kern.c Sat Apr 6 11:07:55 2002
@@ -501,12 +501,8 @@
#ifdef CONFIG_SMP
return("(Unknown)");
#else
- unsigned long addr;
-
- if((addr = um_virt_to_phys(current,
- current->mm->arg_start)) == 0xffffffff)
- return("(Unknown)");
- else return((char *) addr);
+ unsigned long addr = um_virt_to_phys(current, current->mm->arg_start);
+ return addr == 0xffffffff? "(Unknown)": phys_to_virt(addr);
#endif
}
--- ../2.4.17.uml.clean/arch/um/kernel/um_arch.c Mon Mar 25 17:27:27 2002
+++ ./arch/um/kernel/um_arch.c Sat Apr 6 11:18:22 2002
@@ -270,6 +270,44 @@
extern int jail;
void *brk_start;
+#ifdef CONFIG_NONLINEAR
+unsigned long psection[MAX_SECTIONS];
+unsigned long vsection[MAX_SECTIONS];
+
+static int init_nonlinear(void)
+{
+ unsigned i, shift = SECTION_SHIFT - PAGE_SHIFT;
+
+ memset(psection, -1, sizeof(psection));
+ memset(vsection, -1, sizeof(vsection));
+ for (i = 0; i < MAX_SECTIONS; i++)
+ psection[i] = (i ^ (i >= 2)) << shift;
+
+ for (i = 0; i < MAX_SECTIONS; i++)
+ if (~psection[i] && psection[i] >> shift < MAX_SECTIONS)
+ vsection[psection[i] >> shift] = i << shift;
+
+ return 0;
+}
+
+static void show_nonlinear(void)
+{
+ int i;
+ printk(">>> logical section to physical num: ");
+ for (i = 0; i < MAX_SECTIONS; i++) printk("%lx ", psection[i]);
printk("\n");
+ printk(">>> physical section to logical num: ");
+ for (i = 0; i < MAX_SECTIONS; i++) printk("%lx ", vsection[i]);
printk("\n");
+}
+
+#else
+# ifndef nil
+# define nil do { } while (0)
+# endif
+
+#define init_nonlinear() nil
+#define show_nonlinear() nil
+#endif
+
int linux_main(int argc, char **argv)
{
unsigned long start_pfn, end_pfn, bootmap_size;
@@ -294,6 +332,9 @@
/* Start physical memory at least 4M after the current brk */
uml_physmem = ROUND_4M(brk_start) + (1 << 22);
+ init_nonlinear();
+ show_nonlinear();
+
setup_machinename(system_utsname.machine);
argv1_begin = argv[1];
@@ -322,10 +363,10 @@
setup_memory();
high_physmem = uml_physmem + physmem_size;
- start_pfn = PFN_UP(__pa(uml_physmem));
- end_pfn = PFN_DOWN(__pa(high_physmem));
+ start_pfn = PFN_UP(virt_to_logical(uml_physmem));
+ end_pfn = PFN_DOWN(virt_to_logical(high_physmem));
bootmap_size = init_bootmem(start_pfn, end_pfn - start_pfn);
- free_bootmem(__pa(uml_physmem) + bootmap_size,
+ free_bootmem(virt_to_logical(uml_physmem) + bootmap_size,
high_physmem - uml_physmem - bootmap_size);
uml_postsetup();
--- ../2.4.17.uml.clean/drivers/char/mem.c Fri Dec 21 18:41:54 2001
+++ ./drivers/char/mem.c Sat Apr 6 11:43:52 2002
@@ -79,7 +79,7 @@
unsigned long end_mem;
ssize_t read;
- end_mem = __pa(high_memory);
+ end_mem = virt_to_logical(high_memory);
if (p >= end_mem)
return 0;
if (count > end_mem - p)
@@ -101,7 +101,7 @@
}
}
#endif
- if (copy_to_user(buf, __va(p), count))
+ if (copy_to_user(buf, logical_to_virt(p), count))
return -EFAULT;
read += count;
*ppos += read;
@@ -114,12 +114,12 @@
unsigned long p = *ppos;
unsigned long end_mem;
- end_mem = __pa(high_memory);
+ end_mem = virt_to_logical(high_memory);
if (p >= end_mem)
return 0;
if (count > end_mem - p)
count = end_mem - p;
- return do_write_mem(file, __va(p), p, buf, count, ppos);
+ return do_write_mem(file, logical_to_virt(p), p, buf, count, ppos);
}
#ifndef pgprot_noncached
@@ -178,7 +178,7 @@
test_bit(X86_FEATURE_CENTAUR_MCR, &boot_cpu_data.x86_capability) )
&& addr >= __pa(high_memory);
#else
- return addr >= __pa(high_memory);
+ return addr >= virt_to_phys(high_memory); // bogosity alert!!
#endif
}
@@ -200,7 +200,7 @@
/*
* Don't dump addresses that are not real memory to a core file.
*/
- if (offset >= __pa(high_memory) || (file->f_flags & O_SYNC))
+ if (offset >= virt_to_logical(high_memory) || (file->f_flags & O_SYNC))
vma->vm_flags |= VM_IO;
if (remap_page_range(vma->vm_start, offset, vma->vm_end-vma->vm_start,
--- ../2.4.17.uml.clean/fs/proc/kcore.c Fri Sep 14 01:04:43 2001
+++ ./fs/proc/kcore.c Mon Apr 8 00:06:28 2002
@@ -50,7 +50,7 @@
memset(&dump, 0, sizeof(struct user));
dump.magic = CMAGIC;
- dump.u_dsize = (virt_to_phys(high_memory) >> PAGE_SHIFT);
+ dump.u_dsize = (logical_to_virt(high_memory) >> PAGE_SHIFT);
#if defined (__i386__) || defined(__x86_64__)
dump.start_code = PAGE_OFFSET;
#endif
@@ -58,7 +58,7 @@
dump.start_data = PAGE_OFFSET;
#endif
- memsize = virt_to_phys(high_memory);
+ memsize = virt_to_logical(high_memory);
if (p >= memsize)
return 0;
if (count > memsize - p)
@@ -239,7 +239,7 @@
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = dataoff;
phdr->p_vaddr = PAGE_OFFSET;
- phdr->p_paddr = __pa(PAGE_OFFSET);
+ phdr->p_paddr = virt_to_phys(PAGE_OFFSET);
phdr->p_filesz = phdr->p_memsz = ((unsigned long)high_memory - PAGE_OFFSET);
phdr->p_align = PAGE_SIZE;
@@ -256,7 +256,7 @@
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = (size_t)m->addr - PAGE_OFFSET + dataoff;
phdr->p_vaddr = (size_t)m->addr;
- phdr->p_paddr = __pa(m->addr);
+ phdr->p_paddr = virt_to_phys(m->addr);
phdr->p_filesz = phdr->p_memsz = m->size;
phdr->p_align = PAGE_SIZE;
}
@@ -382,7 +382,7 @@
}
#endif
/* fill the remainder of the buffer from kernel VM space */
- start = (unsigned long)__va(*fpos - elf_buflen);
+ start = (unsigned long) logical_to_virt(*fpos - elf_buflen);
if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
tsz = buflen;
--- ../2.4.17.uml.clean/include/asm-i386/io.h Wed Mar 27 23:31:33 2002
+++ ./include/asm-i386/io.h Sat Apr 6 11:13:25 2002
@@ -60,20 +60,6 @@
#endif
/*
- * Change virtual addresses to physical addresses and vv.
- * These are pretty trivial
- */
-static inline unsigned long virt_to_phys(void *address)
-{
- return __pa(address);
-}
-
-static inline void * phys_to_virt(unsigned long address)
-{
- return __va(address);
-}
-
-/*
* Change "struct page" to physical address.
*/
#define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT)
--- ../2.4.17.uml.clean/include/asm-um/page.h Wed Mar 27 23:31:33 2002
+++ ./include/asm-um/page.h Sat Apr 6 11:12:38 2002
@@ -29,30 +29,81 @@
#endif /* __ASSEMBLY__ */
+#define __va_space (8*1024*1024)
+
extern unsigned long uml_physmem;
+extern unsigned long max_mapnr;
-#define __va_space (8*1024*1024)
+static inline int VALID_PAGE(struct page *page)
+{
+ return page - mem_map < max_mapnr;
+}
-static inline unsigned long __pa(void *virt)
+static inline void *logical_to_virt(unsigned long p)
{
- return (unsigned long) (virt) - PAGE_OFFSET;
+ return (void *) ((unsigned long) p + PAGE_OFFSET);
}
-static inline void *__va(unsigned long phys)
+static inline unsigned long virt_to_logical(void *v)
{
- return (void *) ((unsigned long) (phys) + PAGE_OFFSET);
+// assert(it's a kernel virtual);
+ return (unsigned long) v - PAGE_OFFSET;
}
-static inline struct page *virt_to_page(void *kaddr)
+#ifdef CONFIG_NONLINEAR
+#define MAX_SECTIONS (32)
+#define SECTION_SHIFT 20 /* 1 meg sections */
+#define SECTION_MASK (~(-1 << SECTION_SHIFT))
+
+extern unsigned long psection[MAX_SECTIONS];
+extern unsigned long vsection[MAX_SECTIONS];
+
+static inline unsigned long logical_to_phys(unsigned long p)
{
- return mem_map + (__pa(kaddr) >> PAGE_SHIFT);
+ return (psection[p >> SECTION_SHIFT] << PAGE_SHIFT) + (p & SECTION_MASK);
}
-extern unsigned long max_mapnr;
+static inline unsigned long phys_to_logical(unsigned long p)
+{
+ return (vsection[p >> SECTION_SHIFT] << PAGE_SHIFT) + (p & SECTION_MASK);
+}
-static inline int VALID_PAGE(struct page *page)
+static inline unsigned long ordinal_to_phys(unsigned long n)
{
- return page - mem_map < max_mapnr;
+ return ( psection[n >> (SECTION_SHIFT - PAGE_SHIFT)] +
+ (n & (SECTION_MASK >> PAGE_SHIFT)) ) << PAGE_SHIFT;
+}
+
+static inline unsigned long phys_to_ordinal(unsigned long p)
+{
+ return vsection[p >> SECTION_SHIFT] + ((p & SECTION_MASK) >> PAGE_SHIFT);
+}
+
+#else
+#define logical_to_phys(p) (p)
+#define phys_to_logical(p) (p)
+#define ordinal_to_phys(n) ((n) << PAGE_SHIFT)
+#define phys_to_ordinal(p) ((p) >> PAGE_SHIFT)
+#endif /* CONFIG_NONLINEAR */
+
+static inline struct page *virt_to_page(void *v)
+{
+ return mem_map + (virt_to_logical(v) >> PAGE_SHIFT);
+}
+
+static inline struct page *phys_to_page(unsigned long p)
+{
+ return mem_map + phys_to_ordinal(p);
+}
+
+static inline unsigned long virt_to_phys(void *v)
+{
+ return logical_to_phys(virt_to_logical(v));
+}
+
+static inline void *phys_to_virt(unsigned long p)
+{
+ return logical_to_virt(phys_to_logical(p));
}
#endif
--- ../2.4.17.uml.clean/include/asm-um/pgtable.h Mon Mar 25 17:27:28 2002
+++ ./include/asm-um/pgtable.h Sat Apr 6 11:13:25 2002
@@ -197,9 +197,8 @@
#define page_address(page) ({ if (!(page)->virtual) BUG(); (page)->virtual;
})
#define __page_address(page) ({ PAGE_OFFSET + (((page) - mem_map) <<
PAGE_SHIFT); })
#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
-#define pte_page(x) \
- (mem_map+((unsigned long)((__pa(pte_val(x)) >> PAGE_SHIFT))))
-#define pte_address(x) ((void *) ((unsigned long) pte_val(x) & PAGE_MASK))
+#define pte_page(x) (mem_map + phys_to_ordinal(pte_val(x)))
+#define pte_address(x) (phys_to_virt(pte_val(x) & PAGE_MASK))
static inline pte_t pte_mknewprot(pte_t pte)
{
@@ -313,18 +312,17 @@
* and a page entry and page directory to the page they refer to.
*/
-#define mk_pte(page, pgprot) \
-({ \
- pte_t __pte; \
- \
- pte_val(__pte) = ((unsigned long) __va((page-mem_map)*(unsigned
long)PAGE_SIZE + pgprot_val(pgprot))); \
- if(pte_present(__pte)) pte_mknewprot(pte_mknewpage(__pte)); \
- __pte; \
-})
+static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) {
+ pte_t pte;
+ pte_val(pte) = ordinal_to_phys(page - mem_map) + pgprot_val(pgprot);
+ if (pte_present(pte))
+ pte_mknewprot(pte_mknewpage(pte));
+ return pte;
+}
/* This takes a physical page address that is used by the remapping
functions */
#define mk_pte_phys(physpage, pgprot) \
-({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; })
+({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); BUG();
__pte; })
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
--- ../2.4.17.uml.clean/include/linux/bootmem.h Thu Nov 22 20:47:23 2001
+++ ./include/linux/bootmem.h Sat Apr 6 11:18:19 2002
@@ -35,11 +35,11 @@
extern void __init free_bootmem (unsigned long addr, unsigned long size);
extern void * __init __alloc_bootmem (unsigned long size, unsigned long
align, unsigned long goal);
#define alloc_bootmem(x) \
- __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem((x), SMP_CACHE_BYTES, virt_to_logical((void *)
MAX_DMA_ADDRESS))
#define alloc_bootmem_low(x) \
__alloc_bootmem((x), SMP_CACHE_BYTES, 0)
#define alloc_bootmem_pages(x) \
- __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem((x), PAGE_SIZE, virt_to_logical((void *) MAX_DMA_ADDRESS))
#define alloc_bootmem_low_pages(x) \
__alloc_bootmem((x), PAGE_SIZE, 0)
extern unsigned long __init free_all_bootmem (void);
@@ -50,9 +50,9 @@
extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long
size, unsigned long align, unsigned long goal);
#define alloc_bootmem_node(pgdat, x) \
- __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, virt_to_logical((void
*) MAX_DMA_ADDRESS))
#define alloc_bootmem_pages_node(pgdat, x) \
- __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, virt_to_logical((void *)
MAX_DMA_ADDRESS))
#define alloc_bootmem_low_pages_node(pgdat, x) \
__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
--- ../2.4.17.uml.clean/mm/bootmem.c Fri Dec 21 18:42:04 2001
+++ ./mm/bootmem.c Sat Apr 6 11:47:26 2002
@@ -51,7 +51,7 @@
pgdat_list = pgdat;
mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL);
- bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
+ bdata->node_bootmem_map = logical_to_virt(mapstart << PAGE_SHIFT);
bdata->node_boot_start = (start << PAGE_SHIFT);
bdata->node_low_pfn = end;
@@ -214,12 +214,12 @@
areasize = 0;
// last_pos unchanged
bdata->last_offset = offset+size;
- ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
+ ret = logical_to_virt(bdata->last_pos*PAGE_SIZE + offset +
bdata->node_boot_start);
} else {
remaining_size = size - remaining_size;
areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE;
- ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
+ ret = logical_to_virt(bdata->last_pos*PAGE_SIZE + offset +
bdata->node_boot_start);
bdata->last_pos = start+areasize-1;
bdata->last_offset = remaining_size;
@@ -228,7 +228,7 @@
} else {
bdata->last_pos = start + areasize - 1;
bdata->last_offset = size & ~PAGE_MASK;
- ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
+ ret = logical_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
}
/*
* Reserve the area now:
--- ../2.4.17.uml.clean/mm/memory.c Fri Dec 21 18:42:05 2001
+++ ./mm/memory.c Mon Apr 8 00:09:23 2002
@@ -806,7 +806,7 @@
pte_t oldpage;
oldpage = ptep_get_and_clear(pte);
- page = virt_to_page(__va(phys_addr));
+ page = phys_to_page(phys_addr);
if ((!VALID_PAGE(page)) || PageReserved(page))
set_pte(pte, mk_pte_phys(phys_addr, prot));
forget_pte(oldpage);
--- ../2.4.17.uml.clean/mm/page_alloc.c Tue Nov 20 01:35:40 2001
+++ ./mm/page_alloc.c Sat Apr 6 11:42:43 2002
@@ -735,7 +735,7 @@
struct page *page = mem_map + offset + i;
page->zone = zone;
if (j != ZONE_HIGHMEM)
- page->virtual = __va(zone_start_paddr);
+ page->virtual = logical_to_virt(zone_start_paddr);
zone_start_paddr += PAGE_SIZE;
}
_______________________________________________
Lse-tech mailing list
Lse-tech@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/lse-tech
Sponsored by http://www.ThinkGeek.com/