Blob Blame History Raw
Default EFI to physical rather than virtual. Upstream seem to be going
in this direction.

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 8e4a165..3c62f15 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -93,6 +93,9 @@ extern int add_efi_memmap;
 extern void efi_memblock_x86_reserve_range(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
+extern void efi_call_phys_prelog_in_physmode(void);
+extern void efi_call_phys_epilog_in_physmode(void);
+extern void efi_pagetable_init(void);
 
 #ifndef CONFIG_EFI
 /*
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 0fe27d7..e1158b0 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -58,6 +58,7 @@ struct efi_memory_map memmap;
 
 static struct efi efi_phys __initdata;
 static efi_system_table_t efi_systab __initdata;
+static efi_runtime_services_t phys_runtime;
 
 static int __init setup_noefi(char *arg)
 {
@@ -172,7 +173,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
 	return status;
 }
 
-static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
+static efi_status_t __init phys_efi_get_time_early(efi_time_t *tm,
 					     efi_time_cap_t *tc)
 {
 	efi_status_t status;
@@ -183,6 +184,112 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
 	return status;
 }
 
+static efi_status_t phys_efi_get_time(efi_time_t *tm,
+				      efi_time_cap_t *tc)
+{
+	efi_status_t status;
+
+	efi_call_phys_prelog_in_physmode();
+	status = efi_call_phys2((void*)phys_runtime.get_time, tm, tc);
+	efi_call_phys_epilog_in_physmode();
+	return status;
+}
+
+static efi_status_t __init phys_efi_set_time(efi_time_t *tm)
+{
+	efi_status_t status;
+
+	efi_call_phys_prelog_in_physmode();
+	status = efi_call_phys1((void*)phys_runtime.set_time, tm);
+	efi_call_phys_epilog_in_physmode();
+	return status;
+}
+
+static efi_status_t phys_efi_get_wakeup_time(efi_bool_t *enabled,
+                                             efi_bool_t *pending,
+                                             efi_time_t *tm)
+{
+	efi_status_t status;
+
+	efi_call_phys_prelog_in_physmode();
+	status = efi_call_phys3((void*)phys_runtime.get_wakeup_time, enabled,
+				pending, tm);
+	efi_call_phys_epilog_in_physmode();
+	return status;
+}
+
+static efi_status_t phys_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
+{
+	efi_status_t status;
+	efi_call_phys_prelog_in_physmode();
+	status = efi_call_phys2((void*)phys_runtime.set_wakeup_time, enabled,
+				tm);
+	efi_call_phys_epilog_in_physmode();
+	return status;
+}
+
+static efi_status_t phys_efi_get_variable(efi_char16_t *name,
+					  efi_guid_t *vendor,
+					  u32 *attr,
+					  unsigned long *data_size,
+					  void *data)
+{
+	efi_status_t status;
+	efi_call_phys_prelog_in_physmode();
+	status = efi_call_phys5((void*)phys_runtime.get_variable, name, vendor,
+				attr, data_size, data);
+	efi_call_phys_epilog_in_physmode();
+	return status;
+}
+
+static efi_status_t phys_efi_get_next_variable(unsigned long *name_size,
+					       efi_char16_t *name,
+					       efi_guid_t *vendor)
+{
+	efi_status_t status;
+
+	efi_call_phys_prelog_in_physmode();
+	status = efi_call_phys3((void*)phys_runtime.get_next_variable,
+				name_size, name, vendor);
+	efi_call_phys_epilog_in_physmode();
+	return status;
+}
+
+static efi_status_t phys_efi_set_variable(efi_char16_t *name,
+					  efi_guid_t *vendor,
+					  unsigned long attr,
+					  unsigned long data_size,
+					  void *data)
+{
+	efi_status_t status;
+	efi_call_phys_prelog_in_physmode();
+	status = efi_call_phys5((void*)phys_runtime.set_variable, name,
+				vendor, attr, data_size, data);
+	efi_call_phys_epilog_in_physmode();
+	return status;
+}
+
+static efi_status_t phys_efi_get_next_high_mono_count(u32 *count)
+{
+	efi_status_t status;
+	efi_call_phys_prelog_in_physmode();
+	status = efi_call_phys1((void*)phys_runtime.get_next_high_mono_count,
+				count);
+	efi_call_phys_epilog_in_physmode();
+	return status;
+}
+
+static void phys_efi_reset_system(int reset_type,
+                                  efi_status_t status,
+                                  unsigned long data_size,
+                                  efi_char16_t *data)
+{
+	efi_call_phys_prelog_in_physmode();
+	efi_call_phys4((void*)phys_runtime.reset_system, reset_type, status,
+				data_size, data);
+	efi_call_phys_epilog_in_physmode();
+}
+
 int efi_set_rtc_mmss(unsigned long nowtime)
 {
 	int real_seconds, real_minutes;
@@ -435,7 +542,9 @@ void __init efi_init(void)
 		 * Make efi_get_time can be called before entering
 		 * virtual mode.
 		 */
-		efi.get_time = phys_efi_get_time;
+		efi.get_time = phys_efi_get_time_early;
+
+		memcpy(&phys_runtime, runtime, sizeof(efi_runtime_services_t));
 	} else
 		printk(KERN_ERR "Could not map the EFI runtime service "
 		       "table!\n");
@@ -466,6 +575,14 @@ void __init efi_init(void)
 #if EFI_DEBUG
 	print_efi_memmap();
 #endif
+
+#ifndef CONFIG_X86_64
+	/*
+	 * Only x86_64 supports physical mode as of now. Use virtual mode
+	 * forcibly.
+	 */
+	usevirtefi = 1;
+#endif
 }
 
 static void __init runtime_code_page_mkexec(void)
@@ -579,6 +696,27 @@ void __init efi_enter_virtual_mode(void)
 	memmap.map = NULL;
 }
 
+void __init efi_setup_physical_mode(void)
+{
+#ifdef CONFIG_X86_64
+	efi_pagetable_init();
+#endif
+	efi.get_time = phys_efi_get_time;
+	efi.set_time = phys_efi_set_time;
+	efi.get_wakeup_time = phys_efi_get_wakeup_time;
+	efi.set_wakeup_time = phys_efi_set_wakeup_time;
+	efi.get_variable = phys_efi_get_variable;
+	efi.get_next_variable = phys_efi_get_next_variable;
+	efi.set_variable = phys_efi_set_variable;
+	efi.get_next_high_mono_count =
+		phys_efi_get_next_high_mono_count;
+	efi.reset_system = phys_efi_reset_system;
+	efi.set_virtual_address_map = NULL; /* Not needed */
+
+	early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
+	memmap.map = NULL;
+}
+
 /*
  * Convenience functions to obtain memory types and attributes
  */
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 5cab48e..90767b1 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -110,3 +110,7 @@ void efi_call_phys_epilog(void)
 
 	local_irq_restore(efi_rt_eflags);
 }
+
+void efi_call_phys_prelog_in_physmode(void) { /* Not supported */ }
+void efi_call_phys_epilog_in_physmode(void) { /* Not supported */ }
+
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ac0621a..ad19fe9 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -39,7 +39,9 @@
 #include <asm/fixmap.h>
 
 static pgd_t save_pgd __initdata;
-static unsigned long efi_flags __initdata;
+static DEFINE_PER_CPU(unsigned long, efi_flags);
+static DEFINE_PER_CPU(unsigned long, save_cr3);
+static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss;
 
 static void __init early_mapping_set_exec(unsigned long start,
 					  unsigned long end,
@@ -80,7 +82,7 @@ void __init efi_call_phys_prelog(void)
 	unsigned long vaddress;
 
 	early_runtime_code_mapping_set_exec(1);
-	local_irq_save(efi_flags);
+	local_irq_save(get_cpu_var(efi_flags));
 	vaddress = (unsigned long)__va(0x0UL);
 	save_pgd = *pgd_offset_k(0x0UL);
 	set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
@@ -94,10 +96,23 @@ void __init efi_call_phys_epilog(void)
 	 */
 	set_pgd(pgd_offset_k(0x0UL), save_pgd);
 	__flush_tlb_all();
-	local_irq_restore(efi_flags);
+	local_irq_restore(get_cpu_var(efi_flags));
 	early_runtime_code_mapping_set_exec(0);
 }
 
+void efi_call_phys_prelog_in_physmode(void)
+{
+	local_irq_save(get_cpu_var(efi_flags));
+	get_cpu_var(save_cr3)= read_cr3();
+	write_cr3(virt_to_phys(efi_pgd));
+}
+
+void efi_call_phys_epilog_in_physmode(void)
+{
+	write_cr3(get_cpu_var(save_cr3));
+	local_irq_restore(get_cpu_var(efi_flags));
+}
+
 void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
 				 u32 type)
 {
@@ -112,3 +127,78 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
 
 	return (void __iomem *)__va(phys_addr);
 }
+
+static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
+{
+	if (pgd_none(*pgd)) {
+		pud_t *pud = (pud_t *)get_zeroed_page(GFP_ATOMIC);
+		set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
+		if (pud != pud_offset(pgd, 0))
+			printk(KERN_ERR "EFI PAGETABLE BUG #00! %p <-> %p\n",
+			       pud, pud_offset(pgd, 0));
+	}
+	return pud_offset(pgd, vaddr);
+}
+
+static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
+{
+	if (pud_none(*pud)) {
+		pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC);
+		set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
+		if (pmd != pmd_offset(pud, 0))
+			printk(KERN_ERR "EFI PAGETABLE BUG #01! %p <-> %p\n",
+			       pmd, pmd_offset(pud, 0));
+	}
+	return pmd_offset(pud, vaddr);
+}
+
+static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
+{
+	if (pmd_none(*pmd)) {
+		pte_t *pte = (pte_t *)get_zeroed_page(GFP_ATOMIC);
+		set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)));
+		if (pte != pte_offset_kernel(pmd, 0))
+			printk(KERN_ERR "EFI PAGETABLE BUG #02!\n");
+	}
+	return pte_offset_kernel(pmd, vaddr);
+}
+
+void __init efi_pagetable_init(void)
+{
+	efi_memory_desc_t *md;
+	unsigned long size;
+	u64 start_pfn, end_pfn, pfn, vaddr;
+	void *p;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	memset(efi_pgd, 0, sizeof(efi_pgd));
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		md = p;
+		if (!(md->type & EFI_RUNTIME_SERVICES_CODE) &&
+		    !(md->type & EFI_RUNTIME_SERVICES_DATA))
+			continue;
+
+		start_pfn = md->phys_addr >> PAGE_SHIFT;
+		size = md->num_pages << EFI_PAGE_SHIFT;
+		end_pfn = PFN_UP(md->phys_addr + size);
+
+		for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
+			vaddr = pfn << PAGE_SHIFT;
+			pgd = efi_pgd + pgd_index(vaddr);
+			pud = fill_pud(pgd, vaddr);
+			pmd = fill_pmd(pud, vaddr);
+			pte = fill_pte(pmd, vaddr);
+			if (md->type & EFI_RUNTIME_SERVICES_CODE)
+				set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+			else
+				set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+		}
+	}
+	pgd = efi_pgd + pgd_index(PAGE_OFFSET);
+	set_pgd(pgd, *pgd_offset_k(PAGE_OFFSET));
+	pgd = efi_pgd + pgd_index(__START_KERNEL_map);
+	set_pgd(pgd, *pgd_offset_k(__START_KERNEL_map));
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index fb737bc..c4e310e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -290,6 +290,7 @@ extern void efi_map_pal_code (void);
 extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
 extern void efi_gettimeofday (struct timespec *ts);
 extern void efi_enter_virtual_mode (void);	/* switch EFI to virtual mode, if possible */
+extern void efi_setup_physical_mode(void);
 extern u64 efi_get_iobase (void);
 extern u32 efi_mem_type (unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
diff --git a/include/linux/init.h b/include/linux/init.h
index 577671c..2f1b28f 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -149,6 +149,7 @@ extern int do_one_initcall(initcall_t fn);
 extern char __initdata boot_command_line[];
 extern char *saved_command_line;
 extern unsigned int reset_devices;
+extern unsigned int usevirtefi;
 
 /* used by init/main.c */
 void setup_arch(char **);
diff --git a/init/main.c b/init/main.c
index 8646401..726025e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -196,6 +196,14 @@ static int __init set_reset_devices(char *str)
 
 __setup("reset_devices", set_reset_devices);
 
+unsigned int usevirtefi;
+static int __init set_virt_efi(char *str)
+{
+	usevirtefi = 1;
+	return 1;
+}
+__setup("virtefi", set_virt_efi);
+
 static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
 const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
 static const char *panic_later, *panic_param;
@@ -668,8 +676,12 @@ asmlinkage void __init start_kernel(void)
 	pidmap_init();
 	anon_vma_init();
 #ifdef CONFIG_X86
-	if (efi_enabled)
-		efi_enter_virtual_mode();
+	if (efi_enabled) {
+		if (usevirtefi)
+			efi_enter_virtual_mode();
+		else
+			efi_setup_physical_mode();
+	}
 #endif
 	thread_info_cache_init();
 	cred_init();