Commit 4bb0d3ec authored by Zachary Amsden's avatar Zachary Amsden Committed by Linus Torvalds

[PATCH] i386: inline asm cleanup

i386 Inline asm cleanup.  Use cr/dr accessor functions.

Also, a potential bugfix.  Also, some CR accessors really should be volatile.
Reads from CR0 (numeric state may change in an exception handler), writes to
CR4 (flipping CR4.TSD) and reads from CR2 (page fault) prevent instruction
re-ordering.  I did not add memory clobber to CR3 / CR4 / CR0 updates, as it
was not there to begin with, and in no case should kernel memory be clobbered,
except when doing a TLB flush, which already has memory clobber.

I noticed that page invalidation does not have a memory clobber.  I can't find
a bug as a result, but there is definitely a potential for a bug here:

#define __flush_tlb_single(addr) \
	__asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))
Signed-off-by: default avatarZachary Amsden <zach@vmware.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 2a0694d1
...@@ -642,12 +642,12 @@ void __devinit cpu_init(void) ...@@ -642,12 +642,12 @@ void __devinit cpu_init(void)
asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
/* Clear all 6 debug registers: */ /* Clear all 6 debug registers: */
set_debugreg(0, 0);
#define CD(register) set_debugreg(0, register) set_debugreg(0, 1);
set_debugreg(0, 2);
CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); set_debugreg(0, 3);
set_debugreg(0, 6);
#undef CD set_debugreg(0, 7);
/* /*
* Force FPU initialization: * Force FPU initialization:
......
...@@ -64,8 +64,6 @@ static int dont_scale_voltage; ...@@ -64,8 +64,6 @@ static int dont_scale_voltage;
#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
#define __hlt() __asm__ __volatile__("hlt": : :"memory")
/* Clock ratios multiplied by 10 */ /* Clock ratios multiplied by 10 */
static int clock_ratio[32]; static int clock_ratio[32];
static int eblcr_table[32]; static int eblcr_table[32];
...@@ -168,11 +166,9 @@ static void do_powersaver(union msr_longhaul *longhaul, ...@@ -168,11 +166,9 @@ static void do_powersaver(union msr_longhaul *longhaul,
outb(0xFE,0x21); /* TMR0 only */ outb(0xFE,0x21); /* TMR0 only */
outb(0xFF,0x80); /* delay */ outb(0xFF,0x80); /* delay */
local_irq_enable(); safe_halt();
__hlt();
wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
__hlt(); halt();
local_irq_disable(); local_irq_disable();
...@@ -251,9 +247,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index) ...@@ -251,9 +247,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
bcr2.bits.CLOCKMUL = clock_ratio_index; bcr2.bits.CLOCKMUL = clock_ratio_index;
local_irq_disable(); local_irq_disable();
wrmsrl (MSR_VIA_BCR2, bcr2.val); wrmsrl (MSR_VIA_BCR2, bcr2.val);
local_irq_enable(); safe_halt();
__hlt();
/* Disable software clock multiplier */ /* Disable software clock multiplier */
rdmsrl (MSR_VIA_BCR2, bcr2.val); rdmsrl (MSR_VIA_BCR2, bcr2.val);
......
...@@ -132,11 +132,7 @@ static void __init set_cx86_memwb(void) ...@@ -132,11 +132,7 @@ static void __init set_cx86_memwb(void)
setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
/* set 'Not Write-through' */ /* set 'Not Write-through' */
cr0 = 0x20000000; cr0 = 0x20000000;
__asm__("movl %%cr0,%%eax\n\t" write_cr0(read_cr0() | cr0);
"orl %0,%%eax\n\t"
"movl %%eax,%%cr0\n"
: : "r" (cr0)
:"ax");
/* CCR2 bit 2: lock NW bit and set WT1 */ /* CCR2 bit 2: lock NW bit and set WT1 */
setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
} }
......
...@@ -79,7 +79,7 @@ static void efi_call_phys_prelog(void) ...@@ -79,7 +79,7 @@ static void efi_call_phys_prelog(void)
* directory. If I have PSE, I just need to duplicate one entry in * directory. If I have PSE, I just need to duplicate one entry in
* page directory. * page directory.
*/ */
__asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); cr4 = read_cr4();
if (cr4 & X86_CR4_PSE) { if (cr4 & X86_CR4_PSE) {
efi_bak_pg_dir_pointer[0].pgd = efi_bak_pg_dir_pointer[0].pgd =
...@@ -115,7 +115,7 @@ static void efi_call_phys_epilog(void) ...@@ -115,7 +115,7 @@ static void efi_call_phys_epilog(void)
cpu_gdt_descr[0].address = cpu_gdt_descr[0].address =
(unsigned long) __va(cpu_gdt_descr[0].address); (unsigned long) __va(cpu_gdt_descr[0].address);
__asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr));
__asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); cr4 = read_cr4();
if (cr4 & X86_CR4_PSE) { if (cr4 & X86_CR4_PSE) {
swapper_pg_dir[pgd_index(0)].pgd = swapper_pg_dir[pgd_index(0)].pgd =
......
...@@ -17,13 +17,7 @@ ...@@ -17,13 +17,7 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/system.h>
static inline unsigned long read_cr3(void)
{
unsigned long cr3;
asm volatile("movl %%cr3,%0": "=r"(cr3));
return cr3;
}
#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
......
...@@ -313,16 +313,12 @@ void show_regs(struct pt_regs * regs) ...@@ -313,16 +313,12 @@ void show_regs(struct pt_regs * regs)
printk(" DS: %04x ES: %04x\n", printk(" DS: %04x ES: %04x\n",
0xffff & regs->xds,0xffff & regs->xes); 0xffff & regs->xds,0xffff & regs->xes);
__asm__("movl %%cr0, %0": "=r" (cr0)); cr0 = read_cr0();
__asm__("movl %%cr2, %0": "=r" (cr2)); cr2 = read_cr2();
__asm__("movl %%cr3, %0": "=r" (cr3)); cr3 = read_cr3();
/* This could fault if %cr4 does not exist */ if (current_cpu_data.x86 > 4) {
__asm__("1: movl %%cr4, %0 \n" cr4 = read_cr4();
"2: \n" }
".section __ex_table,\"a\" \n"
".long 1b,2b \n"
".previous \n"
: "=r" (cr4): "0" (0));
printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
show_trace(NULL, &regs->esp); show_trace(NULL, &regs->esp);
} }
......
...@@ -576,7 +576,7 @@ static void stop_this_cpu (void * dummy) ...@@ -576,7 +576,7 @@ static void stop_this_cpu (void * dummy)
local_irq_disable(); local_irq_disable();
disable_local_APIC(); disable_local_APIC();
if (cpu_data[smp_processor_id()].hlt_works_ok) if (cpu_data[smp_processor_id()].hlt_works_ok)
for(;;) __asm__("hlt"); for(;;) halt();
for (;;); for (;;);
} }
......
...@@ -233,7 +233,7 @@ fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -233,7 +233,7 @@ fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code)
int write, si_code; int write, si_code;
/* get the address */ /* get the address */
__asm__("movl %%cr2,%0":"=r" (address)); address = read_cr2();
if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP) SIGSEGV) == NOTIFY_STOP)
...@@ -453,7 +453,7 @@ no_context: ...@@ -453,7 +453,7 @@ no_context:
printk(" at virtual address %08lx\n",address); printk(" at virtual address %08lx\n",address);
printk(KERN_ALERT " printing eip:\n"); printk(KERN_ALERT " printing eip:\n");
printk("%08lx\n", regs->eip); printk("%08lx\n", regs->eip);
asm("movl %%cr3,%0":"=r" (page)); page = read_cr3();
page = ((unsigned long *) __va(page))[address >> 22]; page = ((unsigned long *) __va(page))[address >> 22];
printk(KERN_ALERT "*pde = %08lx\n", page); printk(KERN_ALERT "*pde = %08lx\n", page);
/* /*
...@@ -526,7 +526,7 @@ vmalloc_fault: ...@@ -526,7 +526,7 @@ vmalloc_fault:
pmd_t *pmd, *pmd_k; pmd_t *pmd, *pmd_k;
pte_t *pte_k; pte_t *pte_k;
asm("movl %%cr3,%0":"=r" (pgd_paddr)); pgd_paddr = read_cr3();
pgd = index + (pgd_t *)__va(pgd_paddr); pgd = index + (pgd_t *)__va(pgd_paddr);
pgd_k = init_mm.pgd + index; pgd_k = init_mm.pgd + index;
......
...@@ -62,7 +62,7 @@ static void flush_kernel_map(void *dummy) ...@@ -62,7 +62,7 @@ static void flush_kernel_map(void *dummy)
{ {
/* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */ /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
if (boot_cpu_data.x86_model >= 4) if (boot_cpu_data.x86_model >= 4)
asm volatile("wbinvd":::"memory"); wbinvd();
/* Flush all to work around Errata in early athlons regarding /* Flush all to work around Errata in early athlons regarding
* large page flushing. * large page flushing.
*/ */
......
...@@ -57,10 +57,10 @@ void __save_processor_state(struct saved_context *ctxt) ...@@ -57,10 +57,10 @@ void __save_processor_state(struct saved_context *ctxt)
/* /*
* control registers * control registers
*/ */
asm volatile ("movl %%cr0, %0" : "=r" (ctxt->cr0)); ctxt->cr0 = read_cr0();
asm volatile ("movl %%cr2, %0" : "=r" (ctxt->cr2)); ctxt->cr2 = read_cr2();
asm volatile ("movl %%cr3, %0" : "=r" (ctxt->cr3)); ctxt->cr3 = read_cr3();
asm volatile ("movl %%cr4, %0" : "=r" (ctxt->cr4)); ctxt->cr4 = read_cr4();
} }
void save_processor_state(void) void save_processor_state(void)
...@@ -109,10 +109,10 @@ void __restore_processor_state(struct saved_context *ctxt) ...@@ -109,10 +109,10 @@ void __restore_processor_state(struct saved_context *ctxt)
/* /*
* control registers * control registers
*/ */
asm volatile ("movl %0, %%cr4" :: "r" (ctxt->cr4)); write_cr4(ctxt->cr4);
asm volatile ("movl %0, %%cr3" :: "r" (ctxt->cr3)); write_cr3(ctxt->cr3);
asm volatile ("movl %0, %%cr2" :: "r" (ctxt->cr2)); write_cr2(ctxt->cr2);
asm volatile ("movl %0, %%cr0" :: "r" (ctxt->cr0)); write_cr2(ctxt->cr0);
/* /*
* now restore the descriptor tables to their proper values * now restore the descriptor tables to their proper values
......
...@@ -19,7 +19,7 @@ int unmap_page_from_agp(struct page *page); ...@@ -19,7 +19,7 @@ int unmap_page_from_agp(struct page *page);
/* Could use CLFLUSH here if the cpu supports it. But then it would /* Could use CLFLUSH here if the cpu supports it. But then it would
need to be called for each cacheline of the whole page so it may not be need to be called for each cacheline of the whole page so it may not be
worth it. Would need a page for it. */ worth it. Would need a page for it. */
#define flush_agp_cache() asm volatile("wbinvd":::"memory") #define flush_agp_cache() wbinvd()
/* Convert a physical address to an address suitable for the GART. */ /* Convert a physical address to an address suitable for the GART. */
#define phys_to_gart(x) (x) #define phys_to_gart(x) (x)
......
...@@ -118,7 +118,10 @@ static void __init check_hlt(void) ...@@ -118,7 +118,10 @@ static void __init check_hlt(void)
printk("disabled\n"); printk("disabled\n");
return; return;
} }
__asm__ __volatile__("hlt ; hlt ; hlt ; hlt"); halt();
halt();
halt();
halt();
printk("OK.\n"); printk("OK.\n");
} }
......
...@@ -203,9 +203,7 @@ static inline unsigned int cpuid_edx(unsigned int op) ...@@ -203,9 +203,7 @@ static inline unsigned int cpuid_edx(unsigned int op)
return edx; return edx;
} }
#define load_cr3(pgdir) \ #define load_cr3(pgdir) write_cr3(__pa(pgdir))
asm volatile("movl %0,%%cr3": :"r" (__pa(pgdir)))
/* /*
* Intel CPU features in CR4 * Intel CPU features in CR4
...@@ -232,22 +230,20 @@ extern unsigned long mmu_cr4_features; ...@@ -232,22 +230,20 @@ extern unsigned long mmu_cr4_features;
static inline void set_in_cr4 (unsigned long mask) static inline void set_in_cr4 (unsigned long mask)
{ {
unsigned cr4;
mmu_cr4_features |= mask; mmu_cr4_features |= mask;
__asm__("movl %%cr4,%%eax\n\t" cr4 = read_cr4();
"orl %0,%%eax\n\t" cr4 |= mask;
"movl %%eax,%%cr4\n" write_cr4(cr4);
: : "irg" (mask)
:"ax");
} }
static inline void clear_in_cr4 (unsigned long mask) static inline void clear_in_cr4 (unsigned long mask)
{ {
unsigned cr4;
mmu_cr4_features &= ~mask; mmu_cr4_features &= ~mask;
__asm__("movl %%cr4,%%eax\n\t" cr4 = read_cr4();
"andl %0,%%eax\n\t" cr4 &= ~mask;
"movl %%eax,%%cr4\n" write_cr4(cr4);
: : "irg" (~mask)
:"ax");
} }
/* /*
......
...@@ -107,13 +107,33 @@ static inline unsigned long _get_base(char * addr) ...@@ -107,13 +107,33 @@ static inline unsigned long _get_base(char * addr)
#define clts() __asm__ __volatile__ ("clts") #define clts() __asm__ __volatile__ ("clts")
#define read_cr0() ({ \ #define read_cr0() ({ \
unsigned int __dummy; \ unsigned int __dummy; \
__asm__( \ __asm__ __volatile__( \
"movl %%cr0,%0\n\t" \ "movl %%cr0,%0\n\t" \
:"=r" (__dummy)); \ :"=r" (__dummy)); \
__dummy; \ __dummy; \
}) })
#define write_cr0(x) \ #define write_cr0(x) \
__asm__("movl %0,%%cr0": :"r" (x)); __asm__ __volatile__("movl %0,%%cr0": :"r" (x));
#define read_cr2() ({ \
unsigned int __dummy; \
__asm__ __volatile__( \
"movl %%cr2,%0\n\t" \
:"=r" (__dummy)); \
__dummy; \
})
#define write_cr2(x) \
__asm__ __volatile__("movl %0,%%cr2": :"r" (x));
#define read_cr3() ({ \
unsigned int __dummy; \
__asm__ ( \
"movl %%cr3,%0\n\t" \
:"=r" (__dummy)); \
__dummy; \
})
#define write_cr3(x) \
__asm__ __volatile__("movl %0,%%cr3": :"r" (x));
#define read_cr4() ({ \ #define read_cr4() ({ \
unsigned int __dummy; \ unsigned int __dummy; \
...@@ -123,7 +143,7 @@ static inline unsigned long _get_base(char * addr) ...@@ -123,7 +143,7 @@ static inline unsigned long _get_base(char * addr)
__dummy; \ __dummy; \
}) })
#define write_cr4(x) \ #define write_cr4(x) \
__asm__("movl %0,%%cr4": :"r" (x)); __asm__ __volatile__("movl %0,%%cr4": :"r" (x));
#define stts() write_cr0(8 | read_cr0()) #define stts() write_cr0(8 | read_cr0())
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
...@@ -447,6 +467,8 @@ struct alt_instr { ...@@ -447,6 +467,8 @@ struct alt_instr {
#define local_irq_enable() __asm__ __volatile__("sti": : :"memory") #define local_irq_enable() __asm__ __volatile__("sti": : :"memory")
/* used in the idle loop; sti takes one instruction cycle to complete */ /* used in the idle loop; sti takes one instruction cycle to complete */
#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") #define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
/* used when interrupts are already enabled or to shutdown the processor */
#define halt() __asm__ __volatile__("hlt": : :"memory")
#define irqs_disabled() \ #define irqs_disabled() \
({ \ ({ \
......
...@@ -535,14 +535,14 @@ static struct xor_block_template xor_block_p5_mmx = { ...@@ -535,14 +535,14 @@ static struct xor_block_template xor_block_p5_mmx = {
#define XMMS_SAVE do { \ #define XMMS_SAVE do { \
preempt_disable(); \ preempt_disable(); \
cr0 = read_cr0(); \
clts(); \
__asm__ __volatile__ ( \ __asm__ __volatile__ ( \
"movl %%cr0,%0 ;\n\t" \ "movups %%xmm0,(%0) ;\n\t" \
"clts ;\n\t" \ "movups %%xmm1,0x10(%0) ;\n\t" \
"movups %%xmm0,(%1) ;\n\t" \ "movups %%xmm2,0x20(%0) ;\n\t" \
"movups %%xmm1,0x10(%1) ;\n\t" \ "movups %%xmm3,0x30(%0) ;\n\t" \
"movups %%xmm2,0x20(%1) ;\n\t" \ : \
"movups %%xmm3,0x30(%1) ;\n\t" \
: "=&r" (cr0) \
: "r" (xmm_save) \ : "r" (xmm_save) \
: "memory"); \ : "memory"); \
} while(0) } while(0)
...@@ -550,14 +550,14 @@ static struct xor_block_template xor_block_p5_mmx = { ...@@ -550,14 +550,14 @@ static struct xor_block_template xor_block_p5_mmx = {
#define XMMS_RESTORE do { \ #define XMMS_RESTORE do { \
__asm__ __volatile__ ( \ __asm__ __volatile__ ( \
"sfence ;\n\t" \ "sfence ;\n\t" \
"movups (%1),%%xmm0 ;\n\t" \ "movups (%0),%%xmm0 ;\n\t" \
"movups 0x10(%1),%%xmm1 ;\n\t" \ "movups 0x10(%0),%%xmm1 ;\n\t" \
"movups 0x20(%1),%%xmm2 ;\n\t" \ "movups 0x20(%0),%%xmm2 ;\n\t" \
"movups 0x30(%1),%%xmm3 ;\n\t" \ "movups 0x30(%0),%%xmm3 ;\n\t" \
"movl %0,%%cr0 ;\n\t" \
: \ : \
: "r" (cr0), "r" (xmm_save) \ : "r" (xmm_save) \
: "memory"); \ : "memory"); \
write_cr0(cr0); \
preempt_enable(); \ preempt_enable(); \
} while(0) } while(0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment