From: Daniel Phillips <phillips@bonn-fries.net> To: lse-tech@lists.sourceforge.net Subject: Re: [Lse-tech] [PATCH] Nonlinear kernel virtual to physical mapping for uml Date: Mon, 8 Apr 2002 01:00:18 +0200 Here's a updated version of the config_nonlinear patch, with improved factoring of the primary functions: unsigned long logical_to_phys(unsigned long p) unsigned long phys_to_logical(unsigned long p) unsigned long ordinal_to_phys(unsigned long n) unsigned long phys_to_ordinal(unsigned long p) which have the following simple definitions when config_nonlinear is not defined: #define logical_to_phys(p) (p) #define phys_to_logical(p) (p) #define ordinal_to_phys(n) ((n) << PAGE_SHIFT) #define phys_to_ordinal(p) ((p) >> PAGE_SHIFT) and otherwise, each is a table translation. I am trying the terminology 'ordinal' in this patch as a substitute for 'pagenum', so we have logical and ordinal, related by PAGE_SHIFT. I've adopted the tradition abbreviations 'phys' and 'virt' for physical and virtual, which makes the patch smaller, but otherwise I don't like those names much - I'd rather write 'physical_to_virtual' than 'phys_to_virt'. That's just me though, I'd appreciate opinions. I've had some time now to think about how this patch relates to config_numa, and my feeling is, it's orthogonal. It is not a lower layer for numa. The practical implication is that config_discontigmem can be removed entirely and config_numa can be written in the way that is best for numa support, and be freed from the necessity to support the non-numa discontig usage in certain architectures, for which the config_nonlinear approach is better in every way. For 32 bit numa, config_nonlinear is needed in order to provide some zone_normal memory on every node, mapped to the local memory of that node. In this case, the combination of config_nonlinear and config_numa could be optimized to avoid extra table lookups in some cases. At this point, I have not thought deeply about the details. --- ../2.4.17.uml.clean/arch/um/config.in Mon Mar 25 17:27:25 2002 +++ ./arch/um/config.in Fri Apr 5 10:30:08 2002 @@ -36,6 +36,7 @@ bool '2G/2G host address space split' CONFIG_HOST_2G_2G bool 'Symmetric multi-processing support' CONFIG_UML_SMP define_bool CONFIG_SMP $CONFIG_UML_SMP +bool 'Support for nonlinear physical memory' CONFIG_NONLINEAR string 'Default main console channel initialization' CONFIG_CON_ZERO_CHAN \ "fd:0,fd:1" string 'Default console channel initialization' CONFIG_CON_CHAN "xterm" --- ../2.4.17.uml.clean/arch/um/kernel/process_kern.c Mon Mar 25 17:27:26 2002 +++ ./arch/um/kernel/process_kern.c Sat Apr 6 11:07:55 2002 @@ -501,12 +501,8 @@ #ifdef CONFIG_SMP return("(Unknown)"); #else - unsigned long addr; - - if((addr = um_virt_to_phys(current, - current->mm->arg_start)) == 0xffffffff) - return("(Unknown)"); - else return((char *) addr); + unsigned long addr = um_virt_to_phys(current, current->mm->arg_start); + return addr == 0xffffffff? "(Unknown)": phys_to_virt(addr); #endif } --- ../2.4.17.uml.clean/arch/um/kernel/um_arch.c Mon Mar 25 17:27:27 2002 +++ ./arch/um/kernel/um_arch.c Sat Apr 6 11:18:22 2002 @@ -270,6 +270,44 @@ extern int jail; void *brk_start; +#ifdef CONFIG_NONLINEAR +unsigned long psection[MAX_SECTIONS]; +unsigned long vsection[MAX_SECTIONS]; + +static int init_nonlinear(void) +{ + unsigned i, shift = SECTION_SHIFT - PAGE_SHIFT; + + memset(psection, -1, sizeof(psection)); + memset(vsection, -1, sizeof(vsection)); + for (i = 0; i < MAX_SECTIONS; i++) + psection[i] = (i ^ (i >= 2)) << shift; + + for (i = 0; i < MAX_SECTIONS; i++) + if (~psection[i] && psection[i] >> shift < MAX_SECTIONS) + vsection[psection[i] >> shift] = i << shift; + + return 0; +} + +static void show_nonlinear(void) +{ + int i; + printk(">>> logical section to physical num: "); + for (i = 0; i < MAX_SECTIONS; i++) printk("%lx ", psection[i]); printk("\n"); + printk(">>> physical section to logical num: "); + for (i = 0; i < MAX_SECTIONS; i++) printk("%lx ", vsection[i]); printk("\n"); +} + +#else +# ifndef nil +# define nil do { } while (0) +# endif + +#define init_nonlinear() nil +#define show_nonlinear() nil +#endif + int linux_main(int argc, char **argv) { unsigned long start_pfn, end_pfn, bootmap_size; @@ -294,6 +332,9 @@ /* Start physical memory at least 4M after the current brk */ uml_physmem = ROUND_4M(brk_start) + (1 << 22); + init_nonlinear(); + show_nonlinear(); + setup_machinename(system_utsname.machine); argv1_begin = argv[1]; @@ -322,10 +363,10 @@ setup_memory(); high_physmem = uml_physmem + physmem_size; - start_pfn = PFN_UP(__pa(uml_physmem)); - end_pfn = PFN_DOWN(__pa(high_physmem)); + start_pfn = PFN_UP(virt_to_logical(uml_physmem)); + end_pfn = PFN_DOWN(virt_to_logical(high_physmem)); bootmap_size = init_bootmem(start_pfn, end_pfn - start_pfn); - free_bootmem(__pa(uml_physmem) + bootmap_size, + free_bootmem(virt_to_logical(uml_physmem) + bootmap_size, high_physmem - uml_physmem - bootmap_size); uml_postsetup(); --- ../2.4.17.uml.clean/drivers/char/mem.c Fri Dec 21 18:41:54 2001 +++ ./drivers/char/mem.c Sat Apr 6 11:43:52 2002 @@ -79,7 +79,7 @@ unsigned long end_mem; ssize_t read; - end_mem = __pa(high_memory); + end_mem = virt_to_logical(high_memory); if (p >= end_mem) return 0; if (count > end_mem - p) @@ -101,7 +101,7 @@ } } #endif - if (copy_to_user(buf, __va(p), count)) + if (copy_to_user(buf, logical_to_virt(p), count)) return -EFAULT; read += count; *ppos += read; @@ -114,12 +114,12 @@ unsigned long p = *ppos; unsigned long end_mem; - end_mem = __pa(high_memory); + end_mem = virt_to_logical(high_memory); if (p >= end_mem) return 0; if (count > end_mem - p) count = end_mem - p; - return do_write_mem(file, __va(p), p, buf, count, ppos); + return do_write_mem(file, logical_to_virt(p), p, buf, count, ppos); } #ifndef pgprot_noncached @@ -178,7 +178,7 @@ test_bit(X86_FEATURE_CENTAUR_MCR, &boot_cpu_data.x86_capability) ) && addr >= __pa(high_memory); #else - return addr >= __pa(high_memory); + return addr >= virt_to_phys(high_memory); // bogosity alert!! #endif } @@ -200,7 +200,7 @@ /* * Don't dump addresses that are not real memory to a core file. */ - if (offset >= __pa(high_memory) || (file->f_flags & O_SYNC)) + if (offset >= virt_to_logical(high_memory) || (file->f_flags & O_SYNC)) vma->vm_flags |= VM_IO; if (remap_page_range(vma->vm_start, offset, vma->vm_end-vma->vm_start, --- ../2.4.17.uml.clean/fs/proc/kcore.c Fri Sep 14 01:04:43 2001 +++ ./fs/proc/kcore.c Mon Apr 8 00:06:28 2002 @@ -50,7 +50,7 @@ memset(&dump, 0, sizeof(struct user)); dump.magic = CMAGIC; - dump.u_dsize = (virt_to_phys(high_memory) >> PAGE_SHIFT); + dump.u_dsize = (logical_to_virt(high_memory) >> PAGE_SHIFT); #if defined (__i386__) || defined(__x86_64__) dump.start_code = PAGE_OFFSET; #endif @@ -58,7 +58,7 @@ dump.start_data = PAGE_OFFSET; #endif - memsize = virt_to_phys(high_memory); + memsize = virt_to_logical(high_memory); if (p >= memsize) return 0; if (count > memsize - p) @@ -239,7 +239,7 @@ phdr->p_flags = PF_R|PF_W|PF_X; phdr->p_offset = dataoff; phdr->p_vaddr = PAGE_OFFSET; - phdr->p_paddr = __pa(PAGE_OFFSET); + phdr->p_paddr = virt_to_phys(PAGE_OFFSET); phdr->p_filesz = phdr->p_memsz = ((unsigned long)high_memory - PAGE_OFFSET); phdr->p_align = PAGE_SIZE; @@ -256,7 +256,7 @@ phdr->p_flags = PF_R|PF_W|PF_X; phdr->p_offset = (size_t)m->addr - PAGE_OFFSET + dataoff; phdr->p_vaddr = (size_t)m->addr; - phdr->p_paddr = __pa(m->addr); + phdr->p_paddr = virt_to_phys(m->addr); phdr->p_filesz = phdr->p_memsz = m->size; phdr->p_align = PAGE_SIZE; } @@ -382,7 +382,7 @@ } #endif /* fill the remainder of the buffer from kernel VM space */ - start = (unsigned long)__va(*fpos - elf_buflen); + start = (unsigned long) logical_to_virt(*fpos - elf_buflen); if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) tsz = buflen; --- ../2.4.17.uml.clean/include/asm-i386/io.h Wed Mar 27 23:31:33 2002 +++ ./include/asm-i386/io.h Sat Apr 6 11:13:25 2002 @@ -60,20 +60,6 @@ #endif /* - * Change virtual addresses to physical addresses and vv. - * These are pretty trivial - */ -static inline unsigned long virt_to_phys(void *address) -{ - return __pa(address); -} - -static inline void * phys_to_virt(unsigned long address) -{ - return __va(address); -} - -/* * Change "struct page" to physical address. */ #define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT) --- ../2.4.17.uml.clean/include/asm-um/page.h Wed Mar 27 23:31:33 2002 +++ ./include/asm-um/page.h Sat Apr 6 11:12:38 2002 @@ -29,30 +29,81 @@ #endif /* __ASSEMBLY__ */ +#define __va_space (8*1024*1024) + extern unsigned long uml_physmem; +extern unsigned long max_mapnr; -#define __va_space (8*1024*1024) +static inline int VALID_PAGE(struct page *page) +{ + return page - mem_map < max_mapnr; +} -static inline unsigned long __pa(void *virt) +static inline void *logical_to_virt(unsigned long p) { - return (unsigned long) (virt) - PAGE_OFFSET; + return (void *) ((unsigned long) p + PAGE_OFFSET); } -static inline void *__va(unsigned long phys) +static inline unsigned long virt_to_logical(void *v) { - return (void *) ((unsigned long) (phys) + PAGE_OFFSET); +// assert(it's a kernel virtual); + return (unsigned long) v - PAGE_OFFSET; } -static inline struct page *virt_to_page(void *kaddr) +#ifdef CONFIG_NONLINEAR +#define MAX_SECTIONS (32) +#define SECTION_SHIFT 20 /* 1 meg sections */ +#define SECTION_MASK (~(-1 << SECTION_SHIFT)) + +extern unsigned long psection[MAX_SECTIONS]; +extern unsigned long vsection[MAX_SECTIONS]; + +static inline unsigned long logical_to_phys(unsigned long p) { - return mem_map + (__pa(kaddr) >> PAGE_SHIFT); + return (psection[p >> SECTION_SHIFT] << PAGE_SHIFT) + (p & SECTION_MASK); } -extern unsigned long max_mapnr; +static inline unsigned long phys_to_logical(unsigned long p) +{ + return (vsection[p >> SECTION_SHIFT] << PAGE_SHIFT) + (p & SECTION_MASK); +} -static inline int VALID_PAGE(struct page *page) +static inline unsigned long ordinal_to_phys(unsigned long n) { - return page - mem_map < max_mapnr; + return ( psection[n >> (SECTION_SHIFT - PAGE_SHIFT)] + + (n & (SECTION_MASK >> PAGE_SHIFT)) ) << PAGE_SHIFT; +} + +static inline unsigned long phys_to_ordinal(unsigned long p) +{ + return vsection[p >> SECTION_SHIFT] + ((p & SECTION_MASK) >> PAGE_SHIFT); +} + +#else +#define logical_to_phys(p) (p) +#define phys_to_logical(p) (p) +#define ordinal_to_phys(n) ((n) << PAGE_SHIFT) +#define phys_to_ordinal(p) ((p) >> PAGE_SHIFT) +#endif /* CONFIG_NONLINEAR */ + +static inline struct page *virt_to_page(void *v) +{ + return mem_map + (virt_to_logical(v) >> PAGE_SHIFT); +} + +static inline struct page *phys_to_page(unsigned long p) +{ + return mem_map + phys_to_ordinal(p); +} + +static inline unsigned long virt_to_phys(void *v) +{ + return logical_to_phys(virt_to_logical(v)); +} + +static inline void *phys_to_virt(unsigned long p) +{ + return logical_to_virt(phys_to_logical(p)); } #endif --- ../2.4.17.uml.clean/include/asm-um/pgtable.h Mon Mar 25 17:27:28 2002 +++ ./include/asm-um/pgtable.h Sat Apr 6 11:13:25 2002 @@ -197,9 +197,8 @@ #define page_address(page) ({ if (!(page)->virtual) BUG(); (page)->virtual; }) #define __page_address(page) ({ PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT); }) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) -#define pte_page(x) \ - (mem_map+((unsigned long)((__pa(pte_val(x)) >> PAGE_SHIFT)))) -#define pte_address(x) ((void *) ((unsigned long) pte_val(x) & PAGE_MASK)) +#define pte_page(x) (mem_map + phys_to_ordinal(pte_val(x))) +#define pte_address(x) (phys_to_virt(pte_val(x) & PAGE_MASK)) static inline pte_t pte_mknewprot(pte_t pte) { @@ -313,18 +312,17 @@ * and a page entry and page directory to the page they refer to. */ -#define mk_pte(page, pgprot) \ -({ \ - pte_t __pte; \ - \ - pte_val(__pte) = ((unsigned long) __va((page-mem_map)*(unsigned long)PAGE_SIZE + pgprot_val(pgprot))); \ - if(pte_present(__pte)) pte_mknewprot(pte_mknewpage(__pte)); \ - __pte; \ -}) +static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) { + pte_t pte; + pte_val(pte) = ordinal_to_phys(page - mem_map) + pgprot_val(pgprot); + if (pte_present(pte)) + pte_mknewprot(pte_mknewpage(pte)); + return pte; +} /* This takes a physical page address that is used by the remapping functions */ #define mk_pte_phys(physpage, pgprot) \ -({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; }) +({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); BUG(); __pte; }) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { --- ../2.4.17.uml.clean/include/linux/bootmem.h Thu Nov 22 20:47:23 2001 +++ ./include/linux/bootmem.h Sat Apr 6 11:18:19 2002 @@ -35,11 +35,11 @@ extern void __init free_bootmem (unsigned long addr, unsigned long size); extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal); #define alloc_bootmem(x) \ - __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem((x), SMP_CACHE_BYTES, virt_to_logical((void *) MAX_DMA_ADDRESS)) #define alloc_bootmem_low(x) \ __alloc_bootmem((x), SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ - __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem((x), PAGE_SIZE, virt_to_logical((void *) MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ __alloc_bootmem((x), PAGE_SIZE, 0) extern unsigned long __init free_all_bootmem (void); @@ -50,9 +50,9 @@ extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat); extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); #define alloc_bootmem_node(pgdat, x) \ - __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, virt_to_logical((void *) MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ - __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, virt_to_logical((void *) MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0) --- ../2.4.17.uml.clean/mm/bootmem.c Fri Dec 21 18:42:04 2001 +++ ./mm/bootmem.c Sat Apr 6 11:47:26 2002 @@ -51,7 +51,7 @@ pgdat_list = pgdat; mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL); - bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); + bdata->node_bootmem_map = logical_to_virt(mapstart << PAGE_SHIFT); bdata->node_boot_start = (start << PAGE_SHIFT); bdata->node_low_pfn = end; @@ -214,12 +214,12 @@ areasize = 0; // last_pos unchanged bdata->last_offset = offset+size; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + + ret = logical_to_virt(bdata->last_pos*PAGE_SIZE + offset + bdata->node_boot_start); } else { remaining_size = size - remaining_size; areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + + ret = logical_to_virt(bdata->last_pos*PAGE_SIZE + offset + bdata->node_boot_start); bdata->last_pos = start+areasize-1; bdata->last_offset = remaining_size; @@ -228,7 +228,7 @@ } else { bdata->last_pos = start + areasize - 1; bdata->last_offset = size & ~PAGE_MASK; - ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); + ret = logical_to_virt(start * PAGE_SIZE + bdata->node_boot_start); } /* * Reserve the area now: --- ../2.4.17.uml.clean/mm/memory.c Fri Dec 21 18:42:05 2001 +++ ./mm/memory.c Mon Apr 8 00:09:23 2002 @@ -806,7 +806,7 @@ pte_t oldpage; oldpage = ptep_get_and_clear(pte); - page = virt_to_page(__va(phys_addr)); + page = phys_to_page(phys_addr); if ((!VALID_PAGE(page)) || PageReserved(page)) set_pte(pte, mk_pte_phys(phys_addr, prot)); forget_pte(oldpage); --- ../2.4.17.uml.clean/mm/page_alloc.c Tue Nov 20 01:35:40 2001 +++ ./mm/page_alloc.c Sat Apr 6 11:42:43 2002 @@ -735,7 +735,7 @@ struct page *page = mem_map + offset + i; page->zone = zone; if (j != ZONE_HIGHMEM) - page->virtual = __va(zone_start_paddr); + page->virtual = logical_to_virt(zone_start_paddr); zone_start_paddr += PAGE_SIZE; } _______________________________________________ Lse-tech mailing list Lse-tech@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/lse-tech Sponsored by http://www.ThinkGeek.com/