Commit a989705c authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6

* git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6:
  [IA64] update memory attribute aliasing documentation & test cases
  [IA64] fail mmaps that span areas with incompatible attributes
  [IA64] allow WB /sys/.../legacy_mem mmaps
  [IA64] make ioremap avoid unsupported attributes
  [IA64] rename ioremap variables to match i386
  [IA64] relax per-cpu TLB requirement to DTC
  [IA64] remove per-cpu ia64_phys_stacked_size_p8
  [IA64] Fix example error injection program
  [IA64] Itanium MC Error Injection Tool: pal_mc_error_inject() interface
  [IA64] Itanium MC Error Injection Tool: Makefile changes
  [IA64] Itanium MC Error Injection Tool: Driver sysfs interface
  [IA64] Itanium MC Error Injection Tool: Doc and sample application
  [IA64] Itanium MC Error Injection Tool: Kernel configuration
parents 2d56d3c4 d2918253
/*
* Exercise /dev/mem mmap cases that have been troublesome in the past
*
* (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
* Bjorn Helgaas <bjorn.helgaas@hp.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <fcntl.h>
#include <fnmatch.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
int sum;
int map_mem(char *path, off_t offset, size_t length, int touch)
{
int fd, rc;
void *addr;
int *c;
fd = open(path, O_RDWR);
if (fd == -1) {
perror(path);
return -1;
}
addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset);
if (addr == MAP_FAILED)
return 1;
if (touch) {
c = (int *) addr;
while (c < (int *) (offset + length))
sum += *c++;
}
rc = munmap(addr, length);
if (rc == -1) {
perror("munmap");
return -1;
}
close(fd);
return 0;
}
int scan_sysfs(char *path, char *file, off_t offset, size_t length, int touch)
{
struct dirent **namelist;
char *name, *path2;
int i, n, r, rc, result = 0;
struct stat buf;
n = scandir(path, &namelist, 0, alphasort);
if (n < 0) {
perror("scandir");
return -1;
}
for (i = 0; i < n; i++) {
name = namelist[i]->d_name;
if (fnmatch(".", name, 0) == 0)
goto skip;
if (fnmatch("..", name, 0) == 0)
goto skip;
path2 = malloc(strlen(path) + strlen(name) + 3);
strcpy(path2, path);
strcat(path2, "/");
strcat(path2, name);
if (fnmatch(file, name, 0) == 0) {
rc = map_mem(path2, offset, length, touch);
if (rc == 0)
fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable");
else if (rc > 0)
fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length);
else {
fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length);
return rc;
}
} else {
r = lstat(path2, &buf);
if (r == 0 && S_ISDIR(buf.st_mode)) {
rc = scan_sysfs(path2, file, offset, length, touch);
if (rc < 0)
return rc;
}
}
result |= rc;
free(path2);
skip:
free(namelist[i]);
}
free(namelist);
return rc;
}
char buf[1024];
int read_rom(char *path)
{
int fd, rc;
size_t size = 0;
fd = open(path, O_RDWR);
if (fd == -1) {
perror(path);
return -1;
}
rc = write(fd, "1", 2);
if (rc <= 0) {
perror("write");
return -1;
}
do {
rc = read(fd, buf, sizeof(buf));
if (rc > 0)
size += rc;
} while (rc > 0);
close(fd);
return size;
}
int scan_rom(char *path, char *file)
{
struct dirent **namelist;
char *name, *path2;
int i, n, r, rc, result = 0;
struct stat buf;
n = scandir(path, &namelist, 0, alphasort);
if (n < 0) {
perror("scandir");
return -1;
}
for (i = 0; i < n; i++) {
name = namelist[i]->d_name;
if (fnmatch(".", name, 0) == 0)
goto skip;
if (fnmatch("..", name, 0) == 0)
goto skip;
path2 = malloc(strlen(path) + strlen(name) + 3);
strcpy(path2, path);
strcat(path2, "/");
strcat(path2, name);
if (fnmatch(file, name, 0) == 0) {
rc = read_rom(path2);
/*
* It's OK if the ROM is unreadable. Maybe there
* is no ROM, or some other error ocurred. The
* important thing is that no MCA happened.
*/
if (rc > 0)
fprintf(stderr, "PASS: %s read %ld bytes\n", path2, rc);
else {
fprintf(stderr, "PASS: %s not readable\n", path2);
return rc;
}
} else {
r = lstat(path2, &buf);
if (r == 0 && S_ISDIR(buf.st_mode)) {
rc = scan_rom(path2, file);
if (rc < 0)
return rc;
}
}
result |= rc;
free(path2);
skip:
free(namelist[i]);
}
free(namelist);
return rc;
}
main()
{
int rc;
if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0)
fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n");
else
fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n");
/*
* It's not safe to blindly read the VGA frame buffer. If you know
* how to poke the card the right way, it should respond, but it's
* not safe in general. Many machines, e.g., Intel chipsets, cover
* up a non-responding card by just returning -1, but others will
* report the failure as a machine check.
*/
if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0)
fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n");
else
fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n");
if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0)
fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n");
else
fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n");
/*
* Often you can map all the individual pieces above (0-0xA0000,
* 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole
* thing at once. This is because the individual pieces use different
* attributes, and there's no single attribute supported over the
* whole region.
*/
rc = map_mem("/dev/mem", 0, 1024*1024, 0);
if (rc == 0)
fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n");
else if (rc > 0)
fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n");
else
fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n");
scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1);
scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0);
scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1);
scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0);
scan_rom("/sys/devices", "rom");
}
......@@ -112,16 +112,6 @@ POTENTIAL ATTRIBUTE ALIASING CASES
The /dev/mem mmap constraints apply.
However, since this is for mapping legacy MMIO space, WB access
does not make sense. This matters on machines without legacy
VGA support: these machines may have WB memory for the entire
first megabyte (or even the entire first granule).
On these machines, we could mmap legacy_mem as WB, which would
be safe in terms of attribute aliasing, but X has no way of
knowing that it is accessing regular memory, not a frame buffer,
so the kernel should fail the mmap rather than doing it with WB.
read/write of /dev/mem
This uses copy_from_user(), which implicitly uses a kernel
......@@ -138,14 +128,20 @@ POTENTIAL ATTRIBUTE ALIASING CASES
ioremap()
This returns a kernel identity mapping for use inside the
kernel.
This returns a mapping for use inside the kernel.
If the region is in kern_memmap, we should use the attribute
specified there. Otherwise, if the EFI memory map reports that
the entire granule supports WB, we should use that (granules
that are partially reserved or occupied by firmware do not appear
in kern_memmap). Otherwise, we should use a UC mapping.
specified there.
If the EFI memory map reports that the entire granule supports
WB, we should use that (granules that are partially reserved
or occupied by firmware do not appear in kern_memmap).
If the granule contains non-WB memory, but we can cover the
region safely with kernel page table mappings, we can use
ioremap_page_range() as most other architectures do.
Failing all of the above, we have to fall back to a UC mapping.
PAST PROBLEM CASES
......@@ -158,7 +154,7 @@ PAST PROBLEM CASES
succeed. It may create either WB or UC user mappings, depending
on whether the region is in kern_memmap or the EFI memory map.
mmap of 0x0-0xA0000 /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
See https://bugzilla.novell.com/show_bug.cgi?id=140858.
......@@ -171,28 +167,25 @@ PAST PROBLEM CASES
so it is safe to use WB mappings.
The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000,
which will use a granule-sized UC mapping covering 0-0xFFFFF. This
granule covers some WB-only memory, but since UC is non-speculative,
the processor will never generate an uncacheable reference to the
WB-only areas unless the driver explicitly touches them.
which uses a granule-sized UC mapping. This granule will cover some
WB-only memory, but since UC is non-speculative, the processor will
never generate an uncacheable reference to the WB-only areas unless
the driver explicitly touches them.
mmap of 0x0-0xFFFFF legacy_mem by "X"
If the EFI memory map reports this entire range as WB, there
is no VGA MMIO hole, and the mmap should fail or be done with
a WB mapping.
If the EFI memory map reports that the entire range supports the
same attributes, we can allow the mmap (and we will prefer WB if
supported, as is the case with HP sx[12]000 machines with VGA
disabled).
There's no easy way for X to determine whether the 0xA0000-0xBFFFF
region is a frame buffer or just memory, so I think it's best to
just fail this mmap request rather than using a WB mapping. As
far as I know, there's no need to map legacy_mem with WB
mappings.
If EFI reports the range as partly WB and partly UC (as on sx[12]000
machines with VGA enabled), we must fail the mmap because there's no
safe attribute to use.
Otherwise, a UC mapping of the entire region is probably safe.
The VGA hole means the region will not be in kern_memmap. The
HP sx1000 chipset doesn't support UC access to the memory surrounding
the VGA hole, but X doesn't need that area anyway and should not
reference it.
If EFI reports some of the range but not all (as on Intel firmware
that doesn't report the VGA frame buffer at all), we should fail the
mmap and force the user to map just the specific region of interest.
mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled
......@@ -202,6 +195,16 @@ PAST PROBLEM CASES
This is a special case of the previous case, and the mmap should
fail for the same reason as above.
read of /sys/devices/.../rom
For VGA devices, this may cause an ioremap() of 0xC0000. This
used to be done with a UC mapping, because the VGA frame buffer
at 0xA0000 prevents use of a WB granule. The UC mapping causes
an MCA on HP sx[12]000 chipsets.
We should use WB page table mappings to avoid covering the VGA
frame buffer.
NOTES
[1] SDM rev 2.2, vol 2, sec 4.4.1.
......
This diff is collapsed.
......@@ -439,6 +439,16 @@ config IA64_PALINFO
To use this option, you have to ensure that the "/proc file system
support" (CONFIG_PROC_FS) is enabled, too.
config IA64_MC_ERR_INJECT
tristate "MC error injection support"
help
Selets whether support for MC error injection. By enabling the
support, kernel provide sysfs interface for user application to
call MC error injection PAL procedure to inject various errors.
This is a useful tool for MCA testing.
If you're unsure, do not select this option.
config SGI_SN
def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
......
......@@ -164,6 +164,7 @@ CONFIG_COMPAT=y
CONFIG_IA64_MCA_RECOVERY=y
CONFIG_PERFMON=y
CONFIG_IA64_PALINFO=y
# CONFIG_MC_ERR_INJECT is not set
CONFIG_SGI_SN=y
# CONFIG_IA64_ESI is not set
......
......@@ -34,6 +34,7 @@ obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_PCI_MSI) += msi_ia64.o
mca_recovery-y += mca_drv.o mca_drv_asm.o
obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
obj-$(CONFIG_IA64_ESI) += esi.o
ifneq ($(CONFIG_IA64_ESI),)
......
......@@ -660,6 +660,29 @@ efi_memory_descriptor (unsigned long phys_addr)
return NULL;
}
static int
efi_memmap_intersects (unsigned long phys_addr, unsigned long size)
{
void *efi_map_start, *efi_map_end, *p;
efi_memory_desc_t *md;
u64 efi_desc_size;
unsigned long end;
efi_map_start = __va(ia64_boot_param->efi_memmap);
efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
efi_desc_size = ia64_boot_param->efi_memdesc_size;
end = phys_addr + size;
for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
md = p;
if (md->phys_addr < end && efi_md_end(md) > phys_addr)
return 1;
}
return 0;
}
u32
efi_mem_type (unsigned long phys_addr)
{
......@@ -766,11 +789,28 @@ valid_phys_addr_range (unsigned long phys_addr, unsigned long size)
int
valid_mmap_phys_addr_range (unsigned long pfn, unsigned long size)
{
unsigned long phys_addr = pfn << PAGE_SHIFT;
u64 attr;
attr = efi_mem_attribute(phys_addr, size);
/*
* MMIO regions are often missing from the EFI memory map.
* We must allow mmap of them for programs like X, so we
* currently can't do any useful validation.
* /dev/mem mmap uses normal user pages, so we don't need the entire
* granule, but the entire region we're mapping must support the same
* attribute.
*/
if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC)
return 1;
/*
* Intel firmware doesn't tell us about all the MMIO regions, so
* in general we have to allow mmap requests. But if EFI *does*
* tell us about anything inside this region, we should deny it.
* The user can always map a smaller region to avoid the overlap.
*/
if (efi_memmap_intersects(phys_addr, size))
return 0;
return 1;
}
......
......@@ -767,7 +767,7 @@ ENTRY(ia64_leave_syscall)
ld8.fill r15=[r3] // M0|1 restore r15
mov b6=r18 // I0 restore b6
addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
LOAD_PHYS_STACK_REG_SIZE(r17)
mov f9=f0 // F clear f9
(pKStk) br.cond.dpnt.many skip_rbs_switch // B
......@@ -775,7 +775,6 @@ ENTRY(ia64_leave_syscall)
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
cover // B add current frame into dirty partition & set cr.ifs
;;
(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8
mov r19=ar.bsp // M2 get new backing store pointer
mov f10=f0 // F clear f10
......@@ -953,9 +952,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
shr.u r18=r19,16 // get byte size of existing "dirty" partition
;;
mov r16=ar.bsp // get existing backing store pointer
addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
;;
ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
LOAD_PHYS_STACK_REG_SIZE(r17)
(pKStk) br.cond.dpnt skip_rbs_switch
/*
......
/*
* err_inject.c -
* 1.) Inject errors to a processor.
* 2.) Query error injection capabilities.
* This driver along with user space code can be acting as an error
* injection tool.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Written by: Fenghua Yu <fenghua.yu@intel.com>, Intel Corporation
* Copyright (C) 2006, Intel Corp. All rights reserved.
*
*/
#include <linux/sysdev.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/module.h>
#define ERR_INJ_DEBUG
#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte;
#define define_one_ro(name) \
static SYSDEV_ATTR(name, 0444, show_##name, NULL)
#define define_one_rw(name) \
static SYSDEV_ATTR(name, 0644, show_##name, store_##name)
static u64 call_start[NR_CPUS];
static u64 phys_addr[NR_CPUS];
static u64 err_type_info[NR_CPUS];
static u64 err_struct_info[NR_CPUS];
static struct {
u64 data1;
u64 data2;
u64 data3;
} __attribute__((__aligned__(16))) err_data_buffer[NR_CPUS];
static s64 status[NR_CPUS];
static u64 capabilities[NR_CPUS];
static u64 resources[NR_CPUS];
#define show(name) \
static ssize_t \
show_##name(struct sys_device *dev, char *buf) \
{ \
u32 cpu=dev->id; \
return sprintf(buf, "%lx\n", name[cpu]); \
}
#define store(name) \
static ssize_t \
store_##name(struct sys_device *dev, const char *buf, size_t size) \
{ \
unsigned int cpu=dev->id; \
name[cpu] = simple_strtoull(buf, NULL, 16); \
return size; \
}
show(call_start)
/* It's user's responsibility to call the PAL procedure on a specific
* processor. The cpu number in driver is only used for storing data.
*/
static ssize_t
store_call_start(struct sys_device *dev, const char *buf, size_t size)
{
unsigned int cpu=dev->id;
unsigned long call_start = simple_strtoull(buf, NULL, 16);
#ifdef ERR_INJ_DEBUG
printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu);
printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]);
printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]);
printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n",
err_data_buffer[cpu].data1,
err_data_buffer[cpu].data2,
err_data_buffer[cpu].data3);
#endif
switch (call_start) {
case 0: /* Do nothing. */
break;
case 1: /* Call pal_mc_error_inject in physical mode. */
status[cpu]=ia64_pal_mc_error_inject_phys(err_type_info[cpu],
err_struct_info[cpu],
ia64_tpa(&err_data_buffer[cpu]),
&capabilities[cpu],
&resources[cpu]);
break;
case 2: /* Call pal_mc_error_inject in virtual mode. */
status[cpu]=ia64_pal_mc_error_inject_virt(err_type_info[cpu],
err_struct_info[cpu],
ia64_tpa(&err_data_buffer[cpu]),
&capabilities[cpu],
&resources[cpu]);
break;
default:
status[cpu] = -EINVAL;
break;
}
#ifdef ERR_INJ_DEBUG
printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]);
printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]);
printk(KERN_DEBUG "resources=%lx\n", resources[cpu]);
#endif
return size;
}
show(err_type_info)
store(err_type_info)
static ssize_t
show_virtual_to_phys(struct sys_device *dev, char *buf)
{
unsigned int cpu=dev->id;
return sprintf(buf, "%lx\n", phys_addr[cpu]);
}
static ssize_t
store_virtual_to_phys(struct sys_device *dev, const char *buf, size_t size)
{
unsigned int cpu=dev->id;
u64 virt_addr=simple_strtoull(buf, NULL, 16);
int ret;
ret = get_user_pages(current, current->mm, virt_addr,
1, VM_READ, 0, NULL, NULL);
if (ret<=0) {
#ifdef ERR_INJ_DEBUG
printk("Virtual address %lx is not existing.\n",virt_addr);
#endif
return -EINVAL;
}
phys_addr[cpu] = ia64_tpa(virt_addr);
return size;
}
show(err_struct_info)
store(err_struct_info)
static ssize_t
show_err_data_buffer(struct sys_device *dev, char *buf)
{
unsigned int cpu=dev->id;
return sprintf(buf, "%lx, %lx, %lx\n",
err_data_buffer[cpu].data1,
err_data_buffer[cpu].data2,
err_data_buffer[cpu].data3);
}
static ssize_t
store_err_data_buffer(struct sys_device *dev, const char *buf, size_t size)
{
unsigned int cpu=dev->id;
int ret;
#ifdef ERR_INJ_DEBUG
printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n",
err_data_buffer[cpu].data1,
err_data_buffer[cpu].data2,
err_data_buffer[cpu].data3,
cpu);
#endif
ret=sscanf(buf, "%lx, %lx, %lx",
&err_data_buffer[cpu].data1,
&err_data_buffer[cpu].data2,
&err_data_buffer[cpu].data3);
if (ret!=ERR_DATA_BUFFER_SIZE)
return -EINVAL;
return size;
}
show(status)
show(capabilities)
show(resources)
define_one_rw(call_start);
define_one_rw(err_type_info);
define_one_rw(err_struct_info);
define_one_rw(err_data_buffer);
define_one_rw(virtual_to_phys);
define_one_ro(status);
define_one_ro(capabilities);
define_one_ro(resources);
static struct attribute *default_attrs[] = {
&attr_call_start.attr,
&attr_virtual_to_phys.attr,
&attr_err_type_info.attr,
&attr_err_struct_info.attr,
&attr_err_data_buffer.attr,
&attr_status.attr,
&attr_capabilities.attr,
&attr_resources.attr,
NULL
};
static struct attribute_group err_inject_attr_group = {
.attrs = default_attrs,
.name = "err_inject"
};
/* Add/Remove err_inject interface for CPU device */
static int __cpuinit err_inject_add_dev(struct sys_device * sys_dev)
{
return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group);
}
static int __cpuinit err_inject_remove_dev(struct sys_device * sys_dev)
{
sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
return 0;
}
static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct sys_device *sys_dev;
sys_dev = get_cpu_sysdev(cpu);
switch (action) {
case CPU_ONLINE:
err_inject_add_dev(sys_dev);
break;
case CPU_DEAD:
err_inject_remove_dev(sys_dev);
break;
}
return NOTIFY_OK;
}
static struct notifier_block __cpuinitdata err_inject_cpu_notifier =
{
.notifier_call = err_inject_cpu_callback,
};
static int __init
err_inject_init(void)
{
int i;
#ifdef ERR_INJ_DEBUG
printk(KERN_INFO "Enter error injection driver.\n");
#endif
for_each_online_cpu(i) {
err_inject_cpu_callback(&err_inject_cpu_notifier, CPU_ONLINE,
(void *)(long)i);
}
register_hotcpu_notifier(&err_inject_cpu_notifier);
return 0;
}
static void __exit
err_inject_exit(void)
{
int i;
struct sys_device *sys_dev;
#ifdef ERR_INJ_DEBUG
printk(KERN_INFO "Exit error injection driver.\n");
#endif
for_each_online_cpu(i) {
sys_dev = get_cpu_sysdev(i);
sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
}
unregister_hotcpu_notifier(&err_inject_cpu_notifier);
}
module_init(err_inject_init);
module_exit(err_inject_exit);
MODULE_AUTHOR("Fenghua Yu <fenghua.yu@intel.com>");
MODULE_DESCRIPTION("MC error injection kenrel sysfs interface");
MODULE_LICENSE("GPL");
......@@ -374,6 +374,7 @@ ENTRY(alt_dtlb_miss)
movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
mov r21=cr.ipsr
mov r31=pr
mov r24=PERCPU_ADDR
;;
#ifdef CONFIG_DISABLE_VHPT
shr.u r22=r16,61 // get the region number into r21
......@@ -386,22 +387,30 @@ ENTRY(alt_dtlb_miss)
(p8) mov r29=b0 // save b0
(p8) br.cond.dptk dtlb_fault
#endif
cmp.ge p10,p11=r16,r24 // access to per_cpu_data?
tbit.z p12,p0=r16,61 // access to region 6?
mov r25=PERCPU_PAGE_SHIFT << 2
mov r26=PERCPU_PAGE_SIZE
nop.m 0
nop.b 0
;;
(p10) mov r19=IA64_KR(PER_CPU_DATA)
(p11) and r19=r19,r16 // clear non-ppn fields
extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
shr.u r18=r16,57 // move address bit 61 to bit 4
and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
;;
andcm r18=0x10,r18 // bit 4=~address-bit(61)
(p10) sub r19=r19,r26
(p10) mov cr.itir=r25
cmp.ne p8,p0=r0,r23
(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
(p12) dep r17=-1,r17,4,1 // set ma=UC for region 6 addr
(p8) br.cond.spnt page_fault
dep r21=-1,r21,IA64_PSR_ED_BIT,1
or r19=r19,r17 // insert PTE control bits into r19
;;
or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
or r19=r19,r17 // insert PTE control bits into r19
(p6) mov cr.ipsr=r21
;;
(p7) itc.d r19 // insert the TLB entry
......
......@@ -101,14 +101,6 @@ ia64_do_tlb_purge:
;;
srlz.d
;;
// 2. Purge DTR for PERCPU data.
movl r16=PERCPU_ADDR
mov r18=PERCPU_PAGE_SHIFT<<2
;;
ptr.d r16,r18
;;
srlz.d
;;
// 3. Purge ITR for PAL code.
GET_THIS_PADDR(r2, ia64_mca_pal_base)
;;
......@@ -196,22 +188,6 @@ ia64_reload_tr:
srlz.i
srlz.d
;;
// 2. Reload DTR register for PERCPU data.
GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte)
;;
movl r16=PERCPU_ADDR // vaddr
movl r18=PERCPU_PAGE_SHIFT<<2
;;
mov cr.itir=r18
mov cr.ifa=r16
;;
ld8 r18=[r2] // load per-CPU PTE
mov r16=IA64_TR_PERCPU_DATA;
;;
itr.d dtr[r16]=r18
;;
srlz.d
;;
// 3. Reload ITR for PAL code.
GET_THIS_PADDR(r2, ia64_mca_pal_pte)
;;
......
......@@ -195,3 +195,23 @@ ia64_patch_gate (void)
ia64_patch_vtop(START(vtop), END(vtop));
ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
}
void ia64_patch_phys_stack_reg(unsigned long val)
{
s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
u64 ip, mask, imm;
/* see instruction format A4: adds r1 = imm13, r3 */
mask = (0x3fUL << 27) | (0x7f << 13);
imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;
while (offp < end) {
ip = (u64) offp + *offp;
ia64_patch(ip, mask, imm);
ia64_fc(ip);
++offp;
}
ia64_sync_i();
ia64_srlz_i();
}
......@@ -75,7 +75,6 @@ extern void ia64_setup_printk_clock(void);
DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
unsigned long ia64_cycles_per_usec;
struct ia64_boot_param *ia64_boot_param;
struct screen_info screen_info;
......@@ -869,6 +868,7 @@ void __cpuinit
cpu_init (void)
{
extern void __cpuinit ia64_mmu_init (void *);
static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG;
unsigned long num_phys_stacked;
pal_vm_info_2_u_t vmi;
unsigned int max_ctx;
......@@ -982,7 +982,10 @@ cpu_init (void)
num_phys_stacked = 96;
}
/* size of physical stacked register partition plus 8 bytes: */
__get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
if (num_phys_stacked > max_num_phys_stacked) {
ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8);
max_num_phys_stacked = num_phys_stacked;
}
platform_cpu_init();
pm_idle = default_idle;
}
......
......@@ -78,6 +78,13 @@ SECTIONS
__stop___mca_table = .;
}
.data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - LOAD_OFFSET)
{
__start___phys_stack_reg_patchlist = .;
*(.data.patch.phys_stack_reg)
__end___phys_stack_reg_patchlist = .;
}
/* Global data */
_data = .;
......
......@@ -355,7 +355,7 @@ setup_gate (void)
void __devinit
ia64_mmu_init (void *my_cpu_data)
{
unsigned long psr, pta, impl_va_bits;
unsigned long pta, impl_va_bits;
extern void __devinit tlb_init (void);
#ifdef CONFIG_DISABLE_VHPT
......@@ -364,15 +364,6 @@ ia64_mmu_init (void *my_cpu_data)
# define VHPT_ENABLE_BIT 1
#endif
/* Pin mapping for percpu area into TLB */
psr = ia64_clear_ic();
ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
PERCPU_PAGE_SHIFT);
ia64_set_psr(psr);
ia64_srlz_i();
/*
* Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
* address space. The IA-64 architecture guarantees that at least 50 bits of
......
/*
* (c) Copyright 2006 Hewlett-Packard Development Company, L.P.
* (c) Copyright 2006, 2007 Hewlett-Packard Development Company, L.P.
* Bjorn Helgaas <bjorn.helgaas@hp.com>
*
* This program is free software; you can redistribute it and/or modify
......@@ -10,51 +10,101 @@
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/efi.h>
#include <linux/io.h>
#include <linux/vmalloc.h>
#include <asm/io.h>
#include <asm/meminit.h>
static inline void __iomem *
__ioremap (unsigned long offset, unsigned long size)
__ioremap (unsigned long phys_addr)
{
return (void __iomem *) (__IA64_UNCACHED_OFFSET | offset);
return (void __iomem *) (__IA64_UNCACHED_OFFSET | phys_addr);
}
void __iomem *
ioremap (unsigned long offset, unsigned long size)
ioremap (unsigned long phys_addr, unsigned long size)
{
void __iomem *addr;
struct vm_struct *area;
unsigned long offset;
pgprot_t prot;
u64 attr;
unsigned long gran_base, gran_size;
unsigned long page_base;
/*
* For things in kern_memmap, we must use the same attribute
* as the rest of the kernel. For more details, see
* Documentation/ia64/aliasing.txt.
*/
attr = kern_mem_attribute(offset, size);
attr = kern_mem_attribute(phys_addr, size);
if (attr & EFI_MEMORY_WB)
return (void __iomem *) phys_to_virt(offset);
return (void __iomem *) phys_to_virt(phys_addr);
else if (attr & EFI_MEMORY_UC)
return __ioremap(offset, size);
return __ioremap(phys_addr);
/*
* Some chipsets don't support UC access to memory. If
* WB is supported for the whole granule, we prefer that.
*/
gran_base = GRANULEROUNDDOWN(offset);
gran_size = GRANULEROUNDUP(offset + size) - gran_base;
gran_base = GRANULEROUNDDOWN(phys_addr);
gran_size = GRANULEROUNDUP(phys_addr + size) - gran_base;
if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB)
return (void __iomem *) phys_to_virt(offset);
return (void __iomem *) phys_to_virt(phys_addr);
return __ioremap(offset, size);
/*
* WB is not supported for the whole granule, so we can't use
* the region 7 identity mapping. If we can safely cover the
* area with kernel page table mappings, we can use those
* instead.
*/
page_base = phys_addr & PAGE_MASK;
size = PAGE_ALIGN(phys_addr + size) - page_base;
if (efi_mem_attribute(page_base, size) & EFI_MEMORY_WB) {
prot = PAGE_KERNEL;
/*
* Mappings have to be page-aligned
*/
offset = phys_addr & ~PAGE_MASK;
phys_addr &= PAGE_MASK;
/*
* Ok, go for it..
*/
area = get_vm_area(size, VM_IOREMAP);
if (!area)
return NULL;
area->phys_addr = phys_addr;
addr = (void __iomem *) area->addr;
if (ioremap_page_range((unsigned long) addr,
(unsigned long) addr + size, phys_addr, prot)) {
vunmap((void __force *) addr);
return NULL;
}
return (void __iomem *) (offset + (char __iomem *)addr);
}
return __ioremap(phys_addr);
}
EXPORT_SYMBOL(ioremap);
void __iomem *
ioremap_nocache (unsigned long offset, unsigned long size)
ioremap_nocache (unsigned long phys_addr, unsigned long size)
{
if (kern_mem_attribute(offset, size) & EFI_MEMORY_WB)
if (kern_mem_attribute(phys_addr, size) & EFI_MEMORY_WB)
return NULL;
return __ioremap(offset, size);
return __ioremap(phys_addr);
}
EXPORT_SYMBOL(ioremap_nocache);
void
iounmap (volatile void __iomem *addr)
{
if (REGION_NUMBER(addr) == RGN_GATE)
vunmap((void *) ((unsigned long) addr & PAGE_MASK));
}
EXPORT_SYMBOL(iounmap);
......@@ -659,8 +659,6 @@ pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
return -EINVAL;
prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size,
vma->vm_page_prot);
if (pgprot_val(prot) != pgprot_val(pgprot_noncached(vma->vm_page_prot)))
return -EINVAL;
addr = pci_get_legacy_mem(bus);
if (IS_ERR(addr))
......
......@@ -103,6 +103,16 @@ name:
# define FSYS_RETURN br.ret.sptk.many b6
#endif
/*
* If physical stack register size is different from DEF_NUM_STACK_REG,
* dynamically patch the kernel for correct size.
*/
.section ".data.patch.phys_stack_reg", "a"
.previous
#define LOAD_PHYS_STACK_REG_SIZE(reg) \
[1:] adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0; \
.xdata4 ".data.patch.phys_stack_reg", 1b-.
/*
* Up until early 2004, use of .align within a function caused bad unwind info.
* TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing
......
......@@ -421,11 +421,7 @@ __writeq (unsigned long val, volatile void __iomem *addr)
extern void __iomem * ioremap(unsigned long offset, unsigned long size);
extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
static inline void
iounmap (volatile void __iomem *addr)
{
}
extern void iounmap (volatile void __iomem *addr);
/* Use normal IO mappings for DMI */
#define dmi_ioremap ioremap
......
......@@ -29,8 +29,7 @@
*/
#define IA64_TR_KERNEL 0 /* itr0, dtr0: maps kernel image (code & data) */
#define IA64_TR_PALCODE 1 /* itr1: maps PALcode as required by EFI */
#define IA64_TR_PERCPU_DATA 1 /* dtr1: percpu data */
#define IA64_TR_CURRENT_STACK 2 /* dtr2: maps kernel's memory- & register-stacks */
#define IA64_TR_CURRENT_STACK 1 /* dtr1: maps kernel's memory- & register-stacks */
/* Processor status register bits: */
#define IA64_PSR_BE_BIT 1
......
......@@ -89,6 +89,8 @@
#define PAL_GET_PSTATE_TYPE_AVGNORESET 2
#define PAL_GET_PSTATE_TYPE_INSTANT 3
#define PAL_MC_ERROR_INJECT 276 /* Injects processor error or returns injection capabilities */
#ifndef __ASSEMBLY__
#include <linux/types.h>
......@@ -1235,6 +1237,37 @@ ia64_pal_mc_error_info (u64 info_index, u64 type_index, u64 *size, u64 *error_in
return iprv.status;
}
/* Injects the requested processor error or returns info on
* supported injection capabilities for current processor implementation
*/
static inline s64
ia64_pal_mc_error_inject_phys (u64 err_type_info, u64 err_struct_info,
u64 err_data_buffer, u64 *capabilities, u64 *resources)
{
struct ia64_pal_retval iprv;
PAL_CALL_PHYS_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info,
err_struct_info, err_data_buffer);
if (capabilities)
*capabilities= iprv.v0;
if (resources)
*resources= iprv.v1;
return iprv.status;
}
static inline s64
ia64_pal_mc_error_inject_virt (u64 err_type_info, u64 err_struct_info,
u64 err_data_buffer, u64 *capabilities, u64 *resources)
{
struct ia64_pal_retval iprv;
PAL_CALL_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info,
err_struct_info, err_data_buffer);
if (capabilities)
*capabilities= iprv.v0;
if (resources)
*resources= iprv.v1;
return iprv.status;
}
/* Inform PALE_CHECK whether a machine check is expected so that PALE_CHECK willnot
* attempt to correct any expected machine checks.
*/
......
......@@ -20,6 +20,7 @@ extern void ia64_patch_imm60 (u64 insn_addr, u64 val); /* patch "brl" w/ip-rel
extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end);
extern void ia64_patch_vtop (unsigned long start, unsigned long end);
extern void ia64_patch_phys_stack_reg(unsigned long val);
extern void ia64_patch_gate (void);
#endif /* _ASM_IA64_PATCH_H */
......@@ -19,6 +19,7 @@
#include <asm/ptrace.h>
#include <asm/ustack.h>
#define IA64_NUM_PHYS_STACK_REG 96
#define IA64_NUM_DBG_REGS 8
#define DEFAULT_MAP_BASE __IA64_UL_CONST(0x2000000000000000)
......
......@@ -11,6 +11,7 @@
extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[];
extern char __start_gate_section[];
extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[];
extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment