per-cpu pte_chain freelists

From:	 William Lee Irwin III <wli@holomorphy.com>
To:	 linux-mm@kvack.org
Subject: per-cpu pte_chain freelists
Date:	 Tue, 21 May 2002 16:02:14 -0700
Cc:	 linux-kernel@vger.kernel.org

Seems to do a little better than per-zone.

Cheers,
Bill

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	1.423   -> 1.424  
#	           mm/rmap.c	1.7     -> 1.8    
#	include/linux/mmzone.h	1.11    -> 1.12   
#	     mm/page_alloc.c	1.47    -> 1.48   
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/05/21	wli@tisifone.holomorphy.com	1.424
# per-cpu pte_chain freelists
# --------------------------------------------
#
diff --minimal -Nru a/include/linux/mmzone.h b/include/linux/mmzone.h
--- a/include/linux/mmzone.h	Tue May 21 15:55:48 2002
+++ b/include/linux/mmzone.h	Tue May 21 15:55:48 2002
@@ -29,6 +29,11 @@
 
 #define MAX_CHUNKS_PER_NODE 8
 
+struct pte_chain_freelist {
+	unsigned long		count;
+	struct pte_chain 	*list;
+};
+
 /*
  * On machines where it is needed (eg PCs) we divide physical memory
  * into multiple physical zones. On a PC we have 3 zones:
@@ -56,8 +61,8 @@
 	struct list_head	inactive_dirty_list;
 	struct list_head	inactive_clean_list;
 	free_area_t		free_area[MAX_ORDER];
-	spinlock_t		pte_chain_freelist_lock;
-	struct pte_chain	*pte_chain_freelist;
+
+	struct pte_chain_freelist pte_chain_freelists[NR_CPUS];
 
 	/*
 	 * wait_table		-- the array holding the hash table
diff --minimal -Nru a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c	Tue May 21 15:55:48 2002
+++ b/mm/page_alloc.c	Tue May 21 15:55:48 2002
@@ -908,11 +908,13 @@
 		zone->inactive_clean_pages = 0;
 		zone->inactive_dirty_pages = 0;
 		zone->need_balance = 0;
-		zone->pte_chain_freelist = NULL;
 		INIT_LIST_HEAD(&zone->active_list);
 		INIT_LIST_HEAD(&zone->inactive_dirty_list);
 		INIT_LIST_HEAD(&zone->inactive_clean_list);
-		spin_lock_init(&zone->pte_chain_freelist_lock);
+		for (i = 0; i < NR_CPUS; ++i) {
+			zone->pte_chain_freelists[i].count = 0;
+			zone->pte_chain_freelists[i].list = NULL;
+		}
 
 		if (!size)
 			continue;
diff --minimal -Nru a/mm/rmap.c b/mm/rmap.c
--- a/mm/rmap.c	Tue May 21 15:55:48 2002
+++ b/mm/rmap.c	Tue May 21 15:55:48 2002
@@ -51,7 +51,7 @@
 static inline struct pte_chain * pte_chain_alloc(zone_t *);
 static inline void pte_chain_free(struct pte_chain *, struct pte_chain *,
 		struct page *, zone_t *);
-static void alloc_new_pte_chains(zone_t *);
+static void alloc_new_pte_chains(zone_t *, int cpu);
 
 /**
  * page_referenced - test if the page was referenced
@@ -335,25 +335,36 @@
  ** functions.
  **/
 
-static inline void pte_chain_push(zone_t * zone,
-		struct pte_chain * pte_chain)
+static inline void pte_chain_push(struct pte_chain ** list, struct pte_chain * pte_chain)
 {
 	pte_chain->ptep = NULL;
-	pte_chain->next = zone->pte_chain_freelist;
-	zone->pte_chain_freelist = pte_chain;
+	pte_chain->next = *list;
+	*list = pte_chain;
 }
 
-static inline struct pte_chain * pte_chain_pop(zone_t * zone)
+static inline struct pte_chain * pte_chain_pop(struct pte_chain ** list)
 {
 	struct pte_chain *pte_chain;
 
-	pte_chain = zone->pte_chain_freelist;
-	zone->pte_chain_freelist = pte_chain->next;
+	pte_chain = *list;
+	*list = pte_chain->next;
 	pte_chain->next = NULL;
 
 	return pte_chain;
 }
 
+static inline void add_to_pte_chain_freelist(struct pte_chain_freelist * freelists, int cpu, struct pte_chain * pte_chain)
+{
+	pte_chain_push(&freelists[cpu].list, pte_chain);
+	freelists[cpu].count++;
+}
+
+static inline struct pte_chain * del_from_pte_chain_freelist(struct pte_chain_freelist * freelists, int cpu)
+{
+	freelists[cpu].count--;
+	return pte_chain_pop(&freelists[cpu].list);
+}
+
 /**
  * pte_chain_free - free pte_chain structure
  * @pte_chain: pte_chain struct to free
@@ -370,14 +381,17 @@
 		struct pte_chain * prev_pte_chain, struct page * page,
 		zone_t * zone)
 {
+	int cpu = smp_processor_id();
+	struct pte_chain_freelist *freelists;
+
+	freelists = zone->pte_chain_freelists;
+
 	if (prev_pte_chain)
 		prev_pte_chain->next = pte_chain->next;
 	else if (page)
 		page->pte_chain = pte_chain->next;
 
-	spin_lock(&zone->pte_chain_freelist_lock);
-	pte_chain_push(zone, pte_chain);
-	spin_unlock(&zone->pte_chain_freelist_lock);
+	add_to_pte_chain_freelist(freelists, cpu, pte_chain);
 }
 
 /**
@@ -391,17 +405,19 @@
 static inline struct pte_chain * pte_chain_alloc(zone_t * zone)
 {
 	struct pte_chain * pte_chain;
+	struct pte_chain_freelist *freelists;
+	int cpu = smp_processor_id();
+
+	freelists = zone->pte_chain_freelists;
 
-	spin_lock(&zone->pte_chain_freelist_lock);
 
 	/* Allocate new pte_chain structs as needed. */
-	if (!zone->pte_chain_freelist)
-		alloc_new_pte_chains(zone);
+	if (!freelists[cpu].list)
+		alloc_new_pte_chains(zone, cpu);
 
 	/* Grab the first pte_chain from the freelist. */
-	pte_chain = pte_chain_pop(zone);
+	pte_chain = del_from_pte_chain_freelist(freelists, cpu);
 
-	spin_unlock(&zone->pte_chain_freelist_lock);
 
 	return pte_chain;
 }
@@ -409,6 +425,7 @@
 /**
  * alloc_new_pte_chains - convert a free page to pte_chain structures
  * @zone: memory zone to allocate pte_chains for
+ * @cpu:  cpu pte_chains are to be allocated for
  *
  * Grabs a free page and converts it to pte_chain structures. We really
  * should pre-allocate these earlier in the pagefault path or come up
@@ -416,18 +433,33 @@
  *
  * Note that we cannot use the slab cache because the pte_chain structure
  * is way smaller than the minimum size of a slab cache allocation.
- * Caller needs to hold the zone->pte_chain_freelist_lock
+ * Caller needs to hold &zone->pte_chain_freelists[cpu].lock
  */
-static void alloc_new_pte_chains(zone_t *zone)
+
+#define PTE_CHAINS_PER_PAGE (PAGE_SIZE/sizeof(struct pte_chain))
+
+static void alloc_new_pte_chains(zone_t *zone, int cpu)
 {
-	struct pte_chain * pte_chain = (void *) get_zeroed_page(GFP_ATOMIC);
-	int i = PAGE_SIZE / sizeof(struct pte_chain);
+	struct pte_chain * pte_chain;
+	struct pte_chain_freelist *freelists = zone->pte_chain_freelists;
+	int i;
 
-	if (pte_chain) {
-		for (; i-- > 0; pte_chain++)
-			pte_chain_push(zone, pte_chain);
-	} else {
-		/* Yeah yeah, I'll fix the pte_chain allocation ... */
+	/*
+	 * Atomically allocate a page and hand it back. Things are not
+	 * highly unbalanced or there is good reason to allocate, so
+	 * actually get a fresh page.
+	 */
+	pte_chain = (void *) get_zeroed_page(GFP_ATOMIC);
+
+	/* Yeah yeah, I'll fix the pte_chain allocation ... */
+	if (!pte_chain)
 		panic("Fix pte_chain allocation, you lazy bastard!\n");
-	}
+
+	/*
+	 * Be greedy and give ourselves the chains. If some cpu wants
+	 * them, it'll eventually end up taking them above.
+	 */
+	freelists[cpu].count += PTE_CHAINS_PER_PAGE;
+	for (i = 0; i < PTE_CHAINS_PER_PAGE; ++i)
+		pte_chain_push(&freelists[cpu].list, &pte_chain[i]);
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/