
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 memhotplug-dave/arch/i386/mm/init.c   |    5 -
 memhotplug-dave/drivers/base/memory.c |   20 ++---
 memhotplug-dave/mm/memory_hotplug.c   |  117 +++++++++++++++++++++++++++++++++-
 memhotplug-dave/mm/nonlinear.c        |   42 +++++++++++-
 memhotplug-dave/mm/page_alloc.c       |   64 +++++++++++++-----
 mm/vmscan.c                           |    0 
 6 files changed, 214 insertions(+), 34 deletions(-)

diff -puN mm/memory_hotplug.c~P-i386-debug mm/memory_hotplug.c
--- memhotplug/mm/memory_hotplug.c~P-i386-debug	2004-08-20 11:28:24.000000000 -0700
+++ memhotplug-dave/mm/memory_hotplug.c	2004-08-20 11:28:24.000000000 -0700
@@ -23,7 +23,7 @@
 
 #include <asm/tlbflush.h>
 
-static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
+struct page *__kmalloc_section_memmap(unsigned long nr_pages)
 {
 	struct page *page, *ret;
 	unsigned long memmap_size = sizeof(struct page) * nr_pages;
@@ -33,6 +33,11 @@ static struct page *__kmalloc_section_me
 		printk("Failed to allocate new memmap!\n");
 		return NULL;
 	}
+	/*
+	 * A sneaky way to store from where we got the memory
+	 * for this mem_map[]
+	 */
+	page->mapping = __kmalloc_section_memmap;
 
 	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
 	memset(ret, 0, memmap_size);
@@ -107,6 +112,100 @@ int online_pages(unsigned long lpfn, uns
 	return 0;
 }
 
+/*
+ * These functions probably need to go next to some other pagetable
+ * handling functions
+ */
+void destroy_pmd_mapping(pmd_t *pmd, char *start_address, unsigned long nr_pages)
+{
+	char *address;
+	pte_t *pte;
+
+	if (pmd_none(*pmd))
+		return;
+
+	for (address = start_address;
+	     address < start_address + (nr_pages * PAGE_SIZE);
+	     address += PAGE_SIZE) {
+		pte = pte_offset_map(pmd, (unsigned long)address);
+
+		if (pte_none(*pte)) {
+			pte_unmap(pte);
+			continue;
+		}
+
+		pte_clear(pte);
+		/* call opposite of ppc64 create_pte_mapping() */
+		pte_unmap(pte);
+	}
+}
+
+
+
+#include <linux/hugetlb.h>
+void destroy_pgd_mapping(pgd_t *pgd, char *start_address, unsigned long nr_pages)
+{
+	char *address;
+	pmd_t *pmd;
+
+	if (pgd_none(*pgd))
+		return;
+
+	for (address = start_address;
+	     address < start_address + (nr_pages * PAGE_SIZE);
+	     address += PMD_SIZE) {
+		pmd = pmd_offset(pgd, (unsigned long)address);
+
+		if (pmd_huge(*pmd)) {
+			pmd_clear(pmd);
+			/* call opposite of ppc64 create_pte_mapping()
+			arch_destroy_pte_mapping(...);
+			*/
+		} else
+			destroy_pmd_mapping(pmd, address, nr_pages);
+	}
+}
+
+
+void destroy_mm_mapping(struct mm_struct *mm, char *start_address, unsigned long nr_pages)
+{
+	char *address;
+	pgd_t *pgd;
+
+	/* page table lock goes here :) */
+	for (address = start_address;
+	     address < start_address + (nr_pages * PAGE_SIZE);
+	     address += PGDIR_SIZE) {
+		pgd = pgd_offset(mm, (unsigned long)address);
+
+		destroy_pgd_mapping(pgd, address, nr_pages);
+	}
+}
+
+/*
+ * could probably go in some pgtable.c file
+ */
+void destroy_kernel_mapping(unsigned long start_pfn, unsigned long nr_pages)
+{
+	struct list_head *mmlist;
+	char *address;
+
+	address = pfn_to_kaddr(start_pfn);
+
+	/* need mmlist lock */
+	list_for_each(mmlist, &current->mm->mmlist) {
+		struct mm_struct *mm;
+		mm = list_entry(mmlist, struct mm_struct, mmlist);
+		destroy_mm_mapping(mm, address, nr_pages);
+	}
+	/*
+	 * This could be done for each and every PTE for the entire
+	 * loop, but this makes it a lot easier to code.  Removing
+	 * memory is a rare operation, anyway.
+	 */
+	flush_tlb_all();
+}
+
 int __remove_pages(struct zone *zone, unsigned long start_pfn,
 		unsigned long nr_pages, unsigned long attr)
 {
@@ -115,6 +214,20 @@ int __remove_pages(struct zone *zone, un
 	 */
 	if (nr_pages != 1<<(get_order(nr_pages)+PAGE_SHIFT))
 		return -EINVAL;
-	return capture_page_range(start_pfn, get_order(nr_pages));
+	printk("%s(%p, %ld, %ld, %08lx) 0\n", __func__, zone, start_pfn, nr_pages, attr);
+	capture_page_range(start_pfn, get_order(nr_pages<<PAGE_SHIFT));
+	printk("%s() 1\n", __func__);
+
+	/*
+	 * We might still need the struct pages here, so do this
+	 * before killing the virt/phys structures
+	 */
+	if (is_highmem(zone))
+		destroy_kernel_mapping(start_pfn, nr_pages);
+
+	invalidate_phys_mapping(start_pfn, nr_pages);
+	printk("%s() done\n", __func__);
+
+	return 0;
 }
 
diff -puN mm/page_alloc.c~P-i386-debug mm/page_alloc.c
--- memhotplug/mm/page_alloc.c~P-i386-debug	2004-08-20 11:28:24.000000000 -0700
+++ memhotplug-dave/mm/page_alloc.c	2004-08-20 11:28:24.000000000 -0700
@@ -81,9 +81,10 @@ static void bad_page(const char *functio
 {
 	printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
 		function, current->comm, page);
-	printk(KERN_EMERG "flags:0x%08lx mapping:%p mapcount:%d count:%d\n",
+	printk(KERN_EMERG "flags:0x%08lx mapping:%p mapcount:%d count:%d",
 		(unsigned long)page->flags, page->mapping,
 		page_mapcount(page), page_count(page));
+	printk(KERN_EMERG " pfn: %ld\n", page_to_pfn(page));
 	printk(KERN_EMERG "Backtrace:\n");
 	dump_stack();
 	printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n");
@@ -97,6 +98,7 @@ static void bad_page(const char *functio
 	set_page_count(page, 0);
 	reset_page_mapcount(page);
 	page->mapping = NULL;
+	machine_restart(NULL);
 }
 
 #ifndef CONFIG_HUGETLB_PAGE
@@ -302,6 +304,8 @@ static inline void capture_pages(struct 
 	int i;
 	for (i = 0; i < (1 << order); i++)
 		clear_page_under_capture(&page[i]);
+	page_zone(page)->present_pages -= (1UL << order);
+	totalram_pages -= (1UL << order);
 }
 
 static inline int test_remove_range(struct page *page, struct page *base,
@@ -564,16 +568,31 @@ void drain_local_pages(void)
 #endif /* CONFIG_PM */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-static inline int first_uncaptured_page(unsigned long start_pfn, int nr_pages)
+static inline int first_uncaptured_page(unsigned long start_pfn, unsigned long end_pfn)
 {
 	int i;
+	int res = -1;
+	int count = 0;;
 
-	for (i = 0; i < nr_pages; i++) {
-		struct page *page = pfn_to_page(start_pfn + i);
-		if (page_under_capture(page))
-			return i;
+	printk("%s(%ld, %ld) begin\n",
+			__func__, start_pfn, end_pfn
+			);
+
+	for (i = start_pfn; i < end_pfn; i++) {
+		struct page *page = pfn_to_page(i);
+		struct scan_control;
+
+		if (!page_under_capture(page))
+			continue;
+
+		count++;
+		if (res == -1)
+			res = i;
 	}
-	return -1;
+	printk("%s(%ld, %ld) end\n",
+			__func__, start_pfn, end_pfn
+			);
+	return res;
 }
 
 static void
@@ -597,9 +616,11 @@ rmb_and_drain_cpu_pages(void * __unused)
  */
 int capture_page_range(unsigned long start_pfn, int order)
 {
-	int fp = 0;
+	int fup;
 	struct page *page;
-	int i, nr_pages;
+	int i;
+	unsigned long nr_pages;
+	unsigned long end_pfn;
 
 	/* If the start_pfn is not aligned with the order return failure */
 	if (start_pfn % (1 << order) != 0)
@@ -621,13 +642,14 @@ int capture_page_range(unsigned long sta
 
 	page = pfn_to_page(start_pfn);
 	remove_page_freearea(page, order);
-	nr_pages = 1<<order;
-	/*
-	 * storing the last result (fp) keeps up from having
-	 * to walk the entire range each time
-	 */
-	while ((fp = first_uncaptured_page(start_pfn + fp, nr_pages - fp)) >= 0)
-		msleep(100);
+	nr_pages = 1UL<<order;
+
+	fup = start_pfn;
+	end_pfn = start_pfn + nr_pages;
+	while((fup = first_uncaptured_page(fup, end_pfn)) >= 0)
+		msleep(4000);
+
+	printk("%s() done\n", __func__);
 
 	return 0;
 }
@@ -1007,7 +1029,7 @@ rebalance:
 	}
 
 nopage:
-	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
+	if (0 && !(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
 		printk(KERN_WARNING "%s: page allocation failure."
 			" order:%d, mode:0x%x\n",
 			p->comm, order, gfp_mask);
@@ -1661,9 +1683,13 @@ static void __init calculate_zone_totalp
 void __devinit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		unsigned long start_pfn)
 {
-	struct page *start = pfn_to_page(start_pfn);
+	struct page *start;
 	struct page *page;
 
+	printk("%s() 0\n", __func__);
+	start = pfn_to_page(start_pfn);
+	printk("%s() 1\n", __func__);
+
 	for (page = start; page < (start + size); page++) {
 		set_page_zone(page, NODEZONE(nid, zone));
 		set_page_count(page, 0);
@@ -1677,6 +1703,7 @@ void __devinit memmap_init_zone(unsigned
 #endif
 		start_pfn++;
 	}
+	printk("%s() 2\n", __func__);
 }
 
 /*
@@ -2014,6 +2041,7 @@ void __init free_area_init_node(int nid,
 	pgdat->node_start_pfn = node_start_pfn;
 	calculate_zone_totalpages(pgdat, zones_size, zholes_size);
 
+	printk("%s()\n", __func__);
 	alloc_node_mem_map(pgdat);
 
 	free_area_init_core(pgdat, zones_size, zholes_size);
diff -puN include/linux/mm.h~P-i386-debug include/linux/mm.h
diff -puN include/asm-i386/page.h~P-i386-debug include/asm-i386/page.h
diff -puN arch/i386/Kconfig~P-i386-debug arch/i386/Kconfig
diff -puN mm/nonlinear.c~P-i386-debug mm/nonlinear.c
--- memhotplug/mm/nonlinear.c~P-i386-debug	2004-08-20 11:28:24.000000000 -0700
+++ memhotplug-dave/mm/nonlinear.c	2004-08-20 11:28:24.000000000 -0700
@@ -46,6 +46,8 @@ setup_memsections(void)
 {
 	int index;
 
+	printk("%s()\n", __func__);
+
 	for (index = 0; index < NR_SECTIONS; index++) {
 		mem_section[index].phys_section = INVALID_SECTION;
 		mem_section[index].mem_map = NULL;
@@ -64,17 +66,23 @@ alloc_memsections(unsigned long start_pf
 	unsigned int sect_count;
 	unsigned short sect_index;
 
+	printk("%s(%08lx, %08lx, %08lx)\n", __func__, start_pfn, start_phys_pfn, pfn_count);
+
 	sect_count = pfn_to_section_roundup(pfn_count);
 	sect_index = index = pfn_to_section(start_pfn);
 	limit = index + sect_count;
 	physid = pfn_to_section(start_phys_pfn);
-	for (; index < limit; index++, physid++)
+	for (; index < limit; index++, physid++) {
 		mem_section[index].phys_section = physid;
+		printk("set mem_section[%d].phys_section: %d\n", index, mem_section[index].phys_section);
+	}
 
 	index = pfn_to_section(start_phys_pfn);
 	limit = index + sect_count;
-	for (; index < limit; index++, sect_index++)
+	for (; index < limit; index++, sect_index++) {
 		phys_section[index] = sect_index;
+		printk("set phys_section[%d]: %d\n", index, phys_section[index]);
+	}
 }
 
 void
@@ -99,6 +107,8 @@ memmap_init(unsigned long num_pages, int
 {
 	unsigned long offset;
 
+	printk("nonlinear memmap_init(%ld, %d, %ld, %ld)\n", num_pages, nid, zone, start_pfn);
+	dump_stack();
 	offset = section_offset_pfn(start_pfn);
 	while (num_pages) {
 		unsigned long npages;
@@ -167,6 +177,34 @@ page_to_lpfn(struct page *page)
 	return section_to_pfn(page_section(page)) +
 		(page - mem_section[page_section(page)].mem_map);
 }
+
+extern struct page *__kmalloc_section_memmap(unsigned long nr_pages);
+int invalidate_phys_mapping(unsigned long pfn, unsigned long nr_pages)
+{
+	unsigned int section_nr = pfn_to_section(pfn);
+	int nr_sections = nr_pages >> SECTION_SHIFT;
+	unsigned long memmap_size = sizeof(struct page) * nr_pages;
+	unsigned int i;
+
+	if (pfn_to_section(pfn) == pfn_to_section(pfn-1)) {
+		printk("%s() start pfn: %ld not section-aligned\n", __func__, pfn);
+		return -EINVAL;
+	}
+
+	for (i = section_nr; i < nr_sections; i++) {
+		struct mem_section *ms = &mem_section[phys_section[i]];
+		struct page *page = ms->mem_map;
+
+		if (mem_map->mapping == __kmalloc_section_memmap)
+			__free_pages(page, get_order(memmap_size));
+
+		ms->mem_map = NULL;
+		phys_section[i] = INVALID_SECTION;
+		ms->phys_section = INVALID_PHYS_SECTION;
+	}
+	return 0;
+}
+
 EXPORT_SYMBOL(pfn_to_page);
 EXPORT_SYMBOL(page_to_pfn);
 
diff -puN mm/bootmem.c~P-i386-debug mm/bootmem.c
diff -puN include/linux/rmap.h~P-i386-debug include/linux/rmap.h
diff -puN include/linux/nonlinear.h~P-i386-debug include/linux/nonlinear.h
diff -puN include/linux/memory_hotplug.h~P-i386-debug include/linux/memory_hotplug.h
diff -puN drivers/base/memory.c~P-i386-debug drivers/base/memory.c
--- memhotplug/drivers/base/memory.c~P-i386-debug	2004-08-20 11:28:24.000000000 -0700
+++ memhotplug-dave/drivers/base/memory.c	2004-08-20 11:28:24.000000000 -0700
@@ -129,7 +129,7 @@ static int
 memory_block_action(struct memory_block *mem, unsigned long action)
 {
 	int i;
-	unsigned long section;
+	unsigned long psection;
 	unsigned long start_lpfn, start_paddr;
 	struct page *first_page;
 	int ret;
@@ -138,20 +138,20 @@ memory_block_action(struct memory_block 
 	 * this eventually needs to be a loop so that a memory_block
 	 * can contain more than a single section
 	 */
-	section = mem->phys_index; //pfn_to_section()??
-	//len = mem->phys_length;
-	//These checks need to be performed, but we can't be accessing
-	//the structures directly.  use pfn
-	//if (mem_section[section].phys_section == INVALID_SECTION)
-	//	return -EINVAL;
-	//
 
-	first_page = pfn_to_page(mem->phys_index << SECTION_SHIFT);
+	psection = mem->phys_index; //pfn_to_section()??
+	first_page = pfn_to_page(section_to_pfn(psection));
+	printk("%s()\n\t"
+		"psection: %ld\n\t"
+	       "first_page: %p\n\t"
+	       "phys_index: %08lx\n",
+			__func__, psection, first_page, mem->phys_index);
+	printk("\tphys_section[%2ld]: %d\n", psection, phys_section[psection]);
 	for (i = 0; i < PAGES_PER_SECTION; i++) {
 		if ((action == MEM_ONLINE) && !PageReserved(first_page)) {
 			printk("%s: section number %ld page number %d "
 				"not reserved, was it already online? \n",
-				__func__, section, i);
+				__func__, psection, i);
 			return -EBUSY;
 		}
 	}
diff -puN arch/i386/mm/init.c~P-i386-debug arch/i386/mm/init.c
--- memhotplug/arch/i386/mm/init.c~P-i386-debug	2004-08-20 11:28:24.000000000 -0700
+++ memhotplug-dave/arch/i386/mm/init.c	2004-08-20 11:28:24.000000000 -0700
@@ -369,8 +369,9 @@ int remove_memory(u64 start, u64 size, u
 	 */
 	zone = page_zone(pfn_to_page(start_pfn));
 
-	printk("%s(): memory will be removed from "
-			"the %s zone\n", __func__, zone->name);
+	printk("%s(%016Lx, %016Lx, %ld): memory will be removed from "
+			"a %s zone\n", __func__, start, size, attr,
+			zone->name);
 
 	/*
 	 * not handling removing memory ranges that
diff -puN mm/vmscan.c~P-i386-debug mm/vmscan.c
_
