Commit f37bda92 authored by Bryan O'Sullivan's avatar Bryan O'Sullivan Committed by Linus Torvalds

[PATCH] IB/ipath: memory management cleanups

Made in-memory rcvhdrq tail update be in dma_alloc'ed memory, not random user
or special kernel (needed for ppc, also "just the right thing to do").

Some cleanups to make unexpected link transitions less likely to produce
complaints about packet errors, and also to not leave SMA packets stuck and
unable to go out.

A few other random debug and comment cleanups.

Always init rcvhdrq head/tail registers to 0, to avoid race conditions (should
have been that way some time ago).
Signed-off-by: default avatarDave Olson <dave.olson@qlogic.com>
Signed-off-by: default avatarBryan O'Sullivan <bryan.osullivan@qlogic.com>
Cc: "Michael S. Tsirkin" <mst@mellanox.co.il>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 06993ca6
...@@ -311,6 +311,9 @@ struct ipath_base_info { ...@@ -311,6 +311,9 @@ struct ipath_base_info {
__u32 spi_rcv_egrchunksize; __u32 spi_rcv_egrchunksize;
/* total size of mmap to cover full rcvegrbuffers */ /* total size of mmap to cover full rcvegrbuffers */
__u32 spi_rcv_egrbuftotlen; __u32 spi_rcv_egrbuftotlen;
__u32 spi_filler_for_align;
/* address of readonly memory copy of the rcvhdrq tail register. */
__u64 spi_rcvhdr_tailaddr;
} __attribute__ ((aligned(8))); } __attribute__ ((aligned(8)));
...@@ -380,13 +383,7 @@ struct ipath_user_info { ...@@ -380,13 +383,7 @@ struct ipath_user_info {
*/ */
__u32 spu_rcvhdrsize; __u32 spu_rcvhdrsize;
/* __u64 spu_unused; /* kept for compatible layout */
* cache line aligned (64 byte) user address to
* which the rcvhdrtail register will be written by infinipath
* whenever it changes, so that no chip registers are read in
* the performance path.
*/
__u64 spu_rcvhdraddr;
/* /*
* address of struct base_info to write to * address of struct base_info to write to
......
...@@ -131,14 +131,6 @@ static struct pci_driver ipath_driver = { ...@@ -131,14 +131,6 @@ static struct pci_driver ipath_driver = {
.id_table = ipath_pci_tbl, .id_table = ipath_pci_tbl,
}; };
/*
* This is where port 0's rcvhdrtail register is written back; we also
* want nothing else sharing the cache line, so make it a cache line
* in size. Used for all units.
*/
volatile __le64 *ipath_port0_rcvhdrtail;
dma_addr_t ipath_port0_rcvhdrtail_dma;
static int port0_rcvhdrtail_refs;
static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
u32 *bar0, u32 *bar1) u32 *bar0, u32 *bar1)
...@@ -268,47 +260,6 @@ int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp) ...@@ -268,47 +260,6 @@ int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp)
return nunits; return nunits;
} }
static int init_port0_rcvhdrtail(struct pci_dev *pdev)
{
int ret;
mutex_lock(&ipath_mutex);
if (!ipath_port0_rcvhdrtail) {
ipath_port0_rcvhdrtail =
dma_alloc_coherent(&pdev->dev,
IPATH_PORT0_RCVHDRTAIL_SIZE,
&ipath_port0_rcvhdrtail_dma,
GFP_KERNEL);
if (!ipath_port0_rcvhdrtail) {
ret = -ENOMEM;
goto bail;
}
}
port0_rcvhdrtail_refs++;
ret = 0;
bail:
mutex_unlock(&ipath_mutex);
return ret;
}
static void cleanup_port0_rcvhdrtail(struct pci_dev *pdev)
{
mutex_lock(&ipath_mutex);
if (!--port0_rcvhdrtail_refs) {
dma_free_coherent(&pdev->dev, IPATH_PORT0_RCVHDRTAIL_SIZE,
(void *) ipath_port0_rcvhdrtail,
ipath_port0_rcvhdrtail_dma);
ipath_port0_rcvhdrtail = NULL;
}
mutex_unlock(&ipath_mutex);
}
/* /*
* These next two routines are placeholders in case we don't have per-arch * These next two routines are placeholders in case we don't have per-arch
* code for controlling write combining. If explicit control of write * code for controlling write combining. If explicit control of write
...@@ -333,20 +284,12 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, ...@@ -333,20 +284,12 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
u32 bar0 = 0, bar1 = 0; u32 bar0 = 0, bar1 = 0;
u8 rev; u8 rev;
ret = init_port0_rcvhdrtail(pdev);
if (ret < 0) {
printk(KERN_ERR IPATH_DRV_NAME
": Could not allocate port0_rcvhdrtail: error %d\n",
-ret);
goto bail;
}
dd = ipath_alloc_devdata(pdev); dd = ipath_alloc_devdata(pdev);
if (IS_ERR(dd)) { if (IS_ERR(dd)) {
ret = PTR_ERR(dd); ret = PTR_ERR(dd);
printk(KERN_ERR IPATH_DRV_NAME printk(KERN_ERR IPATH_DRV_NAME
": Could not allocate devdata: error %d\n", -ret); ": Could not allocate devdata: error %d\n", -ret);
goto bail_rcvhdrtail; goto bail;
} }
ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit); ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
...@@ -574,9 +517,6 @@ bail_disable: ...@@ -574,9 +517,6 @@ bail_disable:
bail_devdata: bail_devdata:
ipath_free_devdata(pdev, dd); ipath_free_devdata(pdev, dd);
bail_rcvhdrtail:
cleanup_port0_rcvhdrtail(pdev);
bail: bail:
return ret; return ret;
} }
...@@ -608,7 +548,6 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) ...@@ -608,7 +548,6 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
pci_disable_device(pdev); pci_disable_device(pdev);
ipath_free_devdata(pdev, dd); ipath_free_devdata(pdev, dd);
cleanup_port0_rcvhdrtail(pdev);
} }
/* general driver use */ /* general driver use */
...@@ -1383,26 +1322,20 @@ bail: ...@@ -1383,26 +1322,20 @@ bail:
* @dd: the infinipath device * @dd: the infinipath device
* @pd: the port data * @pd: the port data
* *
* this *must* be physically contiguous memory, and for now, * this must be contiguous memory (from an i/o perspective), and must be
* that limits it to what kmalloc can do. * DMA'able (which means for some systems, it will go through an IOMMU,
* or be forced into a low address range).
*/ */
int ipath_create_rcvhdrq(struct ipath_devdata *dd, int ipath_create_rcvhdrq(struct ipath_devdata *dd,
struct ipath_portdata *pd) struct ipath_portdata *pd)
{ {
int ret = 0, amt; int ret = 0;
amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
sizeof(u32), PAGE_SIZE);
if (!pd->port_rcvhdrq) { if (!pd->port_rcvhdrq) {
/* dma_addr_t phys_hdrqtail;
* not using REPEAT isn't viable; at 128KB, we can easily
* fail this. The problem with REPEAT is we can block here
* "forever". There isn't an inbetween, unfortunately. We
* could reduce the risk by never freeing the rcvhdrq except
* at unload, but even then, the first time a port is used,
* we could delay for some time...
*/
gfp_t gfp_flags = GFP_USER | __GFP_COMP; gfp_t gfp_flags = GFP_USER | __GFP_COMP;
int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
sizeof(u32), PAGE_SIZE);
pd->port_rcvhdrq = dma_alloc_coherent( pd->port_rcvhdrq = dma_alloc_coherent(
&dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys, &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
...@@ -1415,6 +1348,16 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd, ...@@ -1415,6 +1348,16 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
ret = -ENOMEM; ret = -ENOMEM;
goto bail; goto bail;
} }
pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, GFP_KERNEL);
if (!pd->port_rcvhdrtail_kvaddr) {
ipath_dev_err(dd, "attempt to allocate 1 page "
"for port %u rcvhdrqtailaddr failed\n",
pd->port_port);
ret = -ENOMEM;
goto bail;
}
pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
pd->port_rcvhdrq_size = amt; pd->port_rcvhdrq_size = amt;
...@@ -1424,20 +1367,28 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd, ...@@ -1424,20 +1367,28 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
(unsigned long) pd->port_rcvhdrq_phys, (unsigned long) pd->port_rcvhdrq_phys,
(unsigned long) pd->port_rcvhdrq_size, (unsigned long) pd->port_rcvhdrq_size,
pd->port_port); pd->port_port);
} else {
/* ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx physical\n",
* clear for security, sanity, and/or debugging, each pd->port_port,
* time we reuse (unsigned long long) phys_hdrqtail);
*/
memset(pd->port_rcvhdrq, 0, amt);
} }
else
ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
"hdrtailaddr@%p %llx physical\n",
pd->port_port, pd->port_rcvhdrq,
pd->port_rcvhdrq_phys, pd->port_rcvhdrtail_kvaddr,
(unsigned long long)pd->port_rcvhdrqtailaddr_phys);
/* clear for security and sanity on each use */
memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
memset((void *)pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
/* /*
* tell chip each time we init it, even if we are re-using previous * tell chip each time we init it, even if we are re-using previous
* memory (we zero it at process close) * memory (we zero the register at process close)
*/ */
ipath_cdbg(VERBOSE, "writing port %d rcvhdraddr as %lx\n", ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
pd->port_port, (unsigned long) pd->port_rcvhdrq_phys); pd->port_port, pd->port_rcvhdrqtailaddr_phys);
ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
pd->port_port, pd->port_rcvhdrq_phys); pd->port_port, pd->port_rcvhdrq_phys);
...@@ -1525,15 +1476,27 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) ...@@ -1525,15 +1476,27 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
[INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
[INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
}; };
int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
INFINIPATH_IBCC_LINKCMD_MASK;
ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate " ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate "
"is %s\n", dd->ipath_unit, "is %s\n", dd->ipath_unit,
what[(which >> INFINIPATH_IBCC_LINKCMD_SHIFT) & what[linkcmd],
INFINIPATH_IBCC_LINKCMD_MASK],
ipath_ibcstatus_str[ ipath_ibcstatus_str[
(ipath_read_kreg64 (ipath_read_kreg64
(dd, dd->ipath_kregs->kr_ibcstatus) >> (dd, dd->ipath_kregs->kr_ibcstatus) >>
INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
/* flush all queued sends when going to DOWN or INIT, to be sure that
* they don't block SMA and other MAD packets */
if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
INFINIPATH_S_ABORT);
ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
(unsigned)(dd->ipath_piobcnt2k +
dd->ipath_piobcnt4k) -
dd->ipath_lastport_piobuf);
}
ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
dd->ipath_ibcctrl | which); dd->ipath_ibcctrl | which);
...@@ -1681,60 +1644,54 @@ void ipath_shutdown_device(struct ipath_devdata *dd) ...@@ -1681,60 +1644,54 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
/** /**
* ipath_free_pddata - free a port's allocated data * ipath_free_pddata - free a port's allocated data
* @dd: the infinipath device * @dd: the infinipath device
* @port: the port * @pd: the portdata structure
* @freehdrq: free the port data structure if true
* *
* when closing, free up any allocated data for a port, if the * free up any allocated data for a port
* reference count goes to zero * This should not touch anything that would affect a simultaneous
* Note: this also optionally frees the portdata itself! * re-allocation of port data, because it is called after ipath_mutex
* Any changes here have to be matched up with the reinit case * is released (and can be called from reinit as well).
* of ipath_init_chip(), which calls this routine on reinit after reset. * It should never change any chip state, or global driver state.
* (The only exception to global state is freeing the port0 port0_skbs.)
*/ */
void ipath_free_pddata(struct ipath_devdata *dd, u32 port, int freehdrq) void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
{ {
struct ipath_portdata *pd = dd->ipath_pd[port];
if (!pd) if (!pd)
return; return;
if (freehdrq)
/* if (pd->port_rcvhdrq) {
* only clear and free portdata if we are going to also
* release the hdrq, otherwise we leak the hdrq on each
* open/close cycle
*/
dd->ipath_pd[port] = NULL;
if (freehdrq && pd->port_rcvhdrq) {
ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p " ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
"(size=%lu)\n", pd->port_port, pd->port_rcvhdrq, "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
(unsigned long) pd->port_rcvhdrq_size); (unsigned long) pd->port_rcvhdrq_size);
dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size, dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
pd->port_rcvhdrq, pd->port_rcvhdrq_phys); pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
pd->port_rcvhdrq = NULL; pd->port_rcvhdrq = NULL;
if (pd->port_rcvhdrtail_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
(void *)pd->port_rcvhdrtail_kvaddr,
pd->port_rcvhdrqtailaddr_phys);
pd->port_rcvhdrtail_kvaddr = NULL;
}
} }
if (port && pd->port_rcvegrbuf) { if (pd->port_port && pd->port_rcvegrbuf) {
/* always free this */ unsigned e;
if (pd->port_rcvegrbuf) {
unsigned e; for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
void *base = pd->port_rcvegrbuf[e];
for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { size_t size = pd->port_rcvegrbuf_size;
void *base = pd->port_rcvegrbuf[e];
size_t size = pd->port_rcvegrbuf_size; ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
"chunk %u/%u\n", base,
ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), " (unsigned long) size,
"chunk %u/%u\n", base, e, pd->port_rcvegrbuf_chunks);
(unsigned long) size, dma_free_coherent(&dd->pcidev->dev, size,
e, pd->port_rcvegrbuf_chunks); base, pd->port_rcvegrbuf_phys[e]);
dma_free_coherent(
&dd->pcidev->dev, size, base,
pd->port_rcvegrbuf_phys[e]);
}
vfree(pd->port_rcvegrbuf);
pd->port_rcvegrbuf = NULL;
vfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL;
} }
vfree(pd->port_rcvegrbuf);
pd->port_rcvegrbuf = NULL;
vfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL;
pd->port_rcvegrbuf_chunks = 0; pd->port_rcvegrbuf_chunks = 0;
} else if (port == 0 && dd->ipath_port0_skbs) { } else if (pd->port_port == 0 && dd->ipath_port0_skbs) {
unsigned e; unsigned e;
struct sk_buff **skbs = dd->ipath_port0_skbs; struct sk_buff **skbs = dd->ipath_port0_skbs;
...@@ -1746,10 +1703,8 @@ void ipath_free_pddata(struct ipath_devdata *dd, u32 port, int freehdrq) ...@@ -1746,10 +1703,8 @@ void ipath_free_pddata(struct ipath_devdata *dd, u32 port, int freehdrq)
dev_kfree_skb(skbs[e]); dev_kfree_skb(skbs[e]);
vfree(skbs); vfree(skbs);
} }
if (freehdrq) { kfree(pd->port_tid_pg_list);
kfree(pd->port_tid_pg_list); kfree(pd);
kfree(pd);
}
} }
static int __init infinipath_init(void) static int __init infinipath_init(void)
...@@ -1874,10 +1829,14 @@ static void cleanup_device(struct ipath_devdata *dd) ...@@ -1874,10 +1829,14 @@ static void cleanup_device(struct ipath_devdata *dd)
/* /*
* free any resources still in use (usually just kernel ports) * free any resources still in use (usually just kernel ports)
* at unload * at unload; we do for portcnt, not cfgports, because cfgports
* could have changed while we were loaded.
*/ */
for (port = 0; port < dd->ipath_cfgports; port++) for (port = 0; port < dd->ipath_portcnt; port++) {
ipath_free_pddata(dd, port, 1); struct ipath_portdata *pd = dd->ipath_pd[port];
dd->ipath_pd[port] = NULL;
ipath_free_pddata(dd, pd);
}
kfree(dd->ipath_pd); kfree(dd->ipath_pd);
/* /*
* debuggability, in case some cleanup path tries to use it * debuggability, in case some cleanup path tries to use it
......
...@@ -123,6 +123,7 @@ static int ipath_get_base_info(struct ipath_portdata *pd, ...@@ -123,6 +123,7 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
* on to yet another method of dealing with this * on to yet another method of dealing with this
*/ */
kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys;
kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
...@@ -785,11 +786,12 @@ static int ipath_create_user_egr(struct ipath_portdata *pd) ...@@ -785,11 +786,12 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
bail_rcvegrbuf_phys: bail_rcvegrbuf_phys:
for (e = 0; e < pd->port_rcvegrbuf_chunks && for (e = 0; e < pd->port_rcvegrbuf_chunks &&
pd->port_rcvegrbuf[e]; e++) pd->port_rcvegrbuf[e]; e++) {
dma_free_coherent(&dd->pcidev->dev, size, dma_free_coherent(&dd->pcidev->dev, size,
pd->port_rcvegrbuf[e], pd->port_rcvegrbuf[e],
pd->port_rcvegrbuf_phys[e]); pd->port_rcvegrbuf_phys[e]);
}
vfree(pd->port_rcvegrbuf_phys); vfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL; pd->port_rcvegrbuf_phys = NULL;
bail_rcvegrbuf: bail_rcvegrbuf:
...@@ -804,10 +806,7 @@ static int ipath_do_user_init(struct ipath_portdata *pd, ...@@ -804,10 +806,7 @@ static int ipath_do_user_init(struct ipath_portdata *pd,
{ {
int ret = 0; int ret = 0;
struct ipath_devdata *dd = pd->port_dd; struct ipath_devdata *dd = pd->port_dd;
u64 physaddr, uaddr, off, atmp;
struct page *pagep;
u32 head32; u32 head32;
u64 head;
/* for now, if major version is different, bail */ /* for now, if major version is different, bail */
if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
...@@ -832,54 +831,6 @@ static int ipath_do_user_init(struct ipath_portdata *pd, ...@@ -832,54 +831,6 @@ static int ipath_do_user_init(struct ipath_portdata *pd,
/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
/* set up for the rcvhdr Q tail register writeback to user memory */
if (!uinfo->spu_rcvhdraddr ||
!access_ok(VERIFY_WRITE, (u64 __user *) (unsigned long)
uinfo->spu_rcvhdraddr, sizeof(u64))) {
ipath_dbg("Port %d rcvhdrtail addr %llx not valid\n",
pd->port_port,
(unsigned long long) uinfo->spu_rcvhdraddr);
ret = -EINVAL;
goto done;
}
off = offset_in_page(uinfo->spu_rcvhdraddr);
uaddr = PAGE_MASK & (unsigned long) uinfo->spu_rcvhdraddr;
ret = ipath_get_user_pages_nocopy(uaddr, &pagep);
if (ret) {
dev_info(&dd->pcidev->dev, "Failed to lookup and lock "
"address %llx for rcvhdrtail: errno %d\n",
(unsigned long long) uinfo->spu_rcvhdraddr, -ret);
goto done;
}
ipath_stats.sps_pagelocks++;
pd->port_rcvhdrtail_uaddr = uaddr;
pd->port_rcvhdrtail_pagep = pagep;
pd->port_rcvhdrtail_kvaddr =
page_address(pagep);
pd->port_rcvhdrtail_kvaddr += off;
physaddr = page_to_phys(pagep) + off;
ipath_cdbg(VERBOSE, "port %d user addr %llx hdrtailaddr, %llx "
"physical (off=%llx)\n",
pd->port_port,
(unsigned long long) uinfo->spu_rcvhdraddr,
(unsigned long long) physaddr, (unsigned long long) off);
ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
pd->port_port, physaddr);
atmp = ipath_read_kreg64_port(dd,
dd->ipath_kregs->kr_rcvhdrtailaddr,
pd->port_port);
if (physaddr != atmp) {
ipath_dev_err(dd,
"Catastrophic software error, "
"RcvHdrTailAddr%u written as %llx, "
"read back as %llx\n", pd->port_port,
(unsigned long long) physaddr,
(unsigned long long) atmp);
ret = -EINVAL;
goto done;
}
/* for right now, kernel piobufs are at end, so port 1 is at 0 */ /* for right now, kernel piobufs are at end, so port 1 is at 0 */
pd->port_piobufs = dd->ipath_piobufbase + pd->port_piobufs = dd->ipath_piobufbase +
dd->ipath_pbufsport * (pd->port_port - dd->ipath_pbufsport * (pd->port_port -
...@@ -898,26 +849,18 @@ static int ipath_do_user_init(struct ipath_portdata *pd, ...@@ -898,26 +849,18 @@ static int ipath_do_user_init(struct ipath_portdata *pd,
ret = ipath_create_user_egr(pd); ret = ipath_create_user_egr(pd);
if (ret) if (ret)
goto done; goto done;
/* enable receives now */
/* atomically set enable bit for this port */
set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
&dd->ipath_rcvctrl);
/* /*
* set the head registers for this port to the current values * set the eager head register for this port to the current values
* of the tail pointers, since we don't know if they were * of the tail pointers, since we don't know if they were
* updated on last use of the port. * updated on last use of the port.
*/ */
head32 = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
head = (u64) head32;
ipath_write_ureg(dd, ur_rcvhdrhead, head, pd->port_port);
head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
dd->ipath_lastegrheads[pd->port_port] = -1; dd->ipath_lastegrheads[pd->port_port] = -1;
dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
ipath_cdbg(VERBOSE, "Wrote port%d head %llx, egrhead %x from " ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
"tail regs\n", pd->port_port, pd->port_port, head32);
(unsigned long long) head, head32);
pd->port_tidcursor = 0; /* start at beginning after open */ pd->port_tidcursor = 0; /* start at beginning after open */
/* /*
* now enable the port; the tail registers will be written to memory * now enable the port; the tail registers will be written to memory
...@@ -926,24 +869,76 @@ static int ipath_do_user_init(struct ipath_portdata *pd, ...@@ -926,24 +869,76 @@ static int ipath_do_user_init(struct ipath_portdata *pd,
* transition from 0 to 1, so clear it first, then set it as part of * transition from 0 to 1, so clear it first, then set it as part of
* enabling the port. This will (very briefly) affect any other * enabling the port. This will (very briefly) affect any other
* open ports, but it shouldn't be long enough to be an issue. * open ports, but it shouldn't be long enough to be an issue.
* We explictly set the in-memory copy to 0 beforehand, so we don't
* have to wait to be sure the DMA update has happened.
*/ */
*pd->port_rcvhdrtail_kvaddr = 0ULL;
set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
&dd->ipath_rcvctrl);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl); dd->ipath_rcvctrl);
done: done:
return ret; return ret;
} }
/* common code for the mappings on dma_alloc_coherent mem */
static int ipath_mmap_mem(struct vm_area_struct *vma,
struct ipath_portdata *pd, unsigned len,
int write_ok, dma_addr_t addr, char *what)
{
struct ipath_devdata *dd = pd->port_dd;
unsigned pfn = (unsigned long)addr >> PAGE_SHIFT;
int ret;
if ((vma->vm_end - vma->vm_start) > len) {
dev_info(&dd->pcidev->dev,
"FAIL on %s: len %lx > %x\n", what,
vma->vm_end - vma->vm_start, len);
ret = -EFAULT;
goto bail;
}
if (!write_ok) {
if (vma->vm_flags & VM_WRITE) {
dev_info(&dd->pcidev->dev,
"%s must be mapped readonly\n", what);
ret = -EPERM;
goto bail;
}
/* don't allow them to later change with mprotect */
vma->vm_flags &= ~VM_MAYWRITE;
}
ret = remap_pfn_range(vma, vma->vm_start, pfn,
len, vma->vm_page_prot);
if (ret)
dev_info(&dd->pcidev->dev,
"%s port%u mmap of %lx, %x bytes r%c failed: %d\n",
what, pd->port_port, (unsigned long)addr, len,
write_ok?'w':'o', ret);
else
ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes r%c\n",
what, pd->port_port, (unsigned long)addr, len,
write_ok?'w':'o');
bail:
return ret;
}
static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
u64 ureg) u64 ureg)
{ {
unsigned long phys; unsigned long phys;
int ret; int ret;
/* it's the real hardware, so io_remap works */ /*
* This is real hardware, so use io_remap. This is the mechanism
* for the user process to update the head registers for their port
* in the chip.
*/
if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen " dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
"%lx > PAGE\n", vma->vm_end - vma->vm_start); "%lx > PAGE\n", vma->vm_end - vma->vm_start);
...@@ -969,10 +964,11 @@ static int mmap_piobufs(struct vm_area_struct *vma, ...@@ -969,10 +964,11 @@ static int mmap_piobufs(struct vm_area_struct *vma,
int ret; int ret;
/* /*
* When we map the PIO buffers, we want to map them as writeonly, no * When we map the PIO buffers in the chip, we want to map them as
* read possible. * writeonly, no read possible. This prevents access to previous
* process data, and catches users who might try to read the i/o
* space due to a bug.
*/ */
if ((vma->vm_end - vma->vm_start) > if ((vma->vm_end - vma->vm_start) >
(dd->ipath_pbufsport * dd->ipath_palign)) { (dd->ipath_pbufsport * dd->ipath_palign)) {
dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
...@@ -983,11 +979,10 @@ static int mmap_piobufs(struct vm_area_struct *vma, ...@@ -983,11 +979,10 @@ static int mmap_piobufs(struct vm_area_struct *vma,
} }
phys = dd->ipath_physaddr + pd->port_piobufs; phys = dd->ipath_physaddr + pd->port_piobufs;
/* /*
* Do *NOT* mark this as non-cached (PWT bit), or we don't get the * Don't mark this as non-cached, or we don't get the
* write combining behavior we want on the PIO buffers! * write combining behavior we want on the PIO buffers!
* vma->vm_page_prot =
* pgprot_noncached(vma->vm_page_prot);
*/ */
if (vma->vm_flags & VM_READ) { if (vma->vm_flags & VM_READ) {
...@@ -999,8 +994,7 @@ static int mmap_piobufs(struct vm_area_struct *vma, ...@@ -999,8 +994,7 @@ static int mmap_piobufs(struct vm_area_struct *vma,
} }
/* don't allow them to later change to readable with mprotect */ /* don't allow them to later change to readable with mprotect */
vma->vm_flags &= ~VM_MAYREAD;
vma->vm_flags &= ~VM_MAYWRITE;
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
...@@ -1019,11 +1013,6 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, ...@@ -1019,11 +1013,6 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
dma_addr_t *phys; dma_addr_t *phys;
int ret; int ret;
if (!pd->port_rcvegrbuf) {
ret = -EFAULT;
goto bail;
}
size = pd->port_rcvegrbuf_size; size = pd->port_rcvegrbuf_size;
total_size = pd->port_rcvegrbuf_chunks * size; total_size = pd->port_rcvegrbuf_chunks * size;
if ((vma->vm_end - vma->vm_start) > total_size) { if ((vma->vm_end - vma->vm_start) > total_size) {
...@@ -1041,13 +1030,12 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, ...@@ -1041,13 +1030,12 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
ret = -EPERM; ret = -EPERM;
goto bail; goto bail;
} }
/* don't allow them to later change to writeable with mprotect */
vma->vm_flags &= ~VM_MAYWRITE;
start = vma->vm_start; start = vma->vm_start;
phys = pd->port_rcvegrbuf_phys; phys = pd->port_rcvegrbuf_phys;
/* don't allow them to later change to writeable with mprotect */
vma->vm_flags &= ~VM_MAYWRITE;
for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) { for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT, ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT,
size, vma->vm_page_prot); size, vma->vm_page_prot);
...@@ -1060,78 +1048,6 @@ bail: ...@@ -1060,78 +1048,6 @@ bail:
return ret; return ret;
} }
static int mmap_rcvhdrq(struct vm_area_struct *vma,
struct ipath_portdata *pd)
{
struct ipath_devdata *dd = pd->port_dd;
size_t total_size;
int ret;
/*
* kmalloc'ed memory, physically contiguous; this is from
* spi_rcvhdr_base; we allow user to map read-write so they can
* write hdrq entries to allow protocol code to directly poll
* whether a hdrq entry has been written.
*/
total_size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
sizeof(u32), PAGE_SIZE);
if ((vma->vm_end - vma->vm_start) > total_size) {
dev_info(&dd->pcidev->dev,
"FAIL on rcvhdrq: reqlen %lx > actual %lx\n",
vma->vm_end - vma->vm_start,
(unsigned long) total_size);
ret = -EFAULT;
goto bail;
}
ret = remap_pfn_range(vma, vma->vm_start,
pd->port_rcvhdrq_phys >> PAGE_SHIFT,
vma->vm_end - vma->vm_start,
vma->vm_page_prot);
bail:
return ret;
}
static int mmap_pioavailregs(struct vm_area_struct *vma,
struct ipath_portdata *pd)
{
struct ipath_devdata *dd = pd->port_dd;
int ret;
/*
* when we map the PIO bufferavail registers, we want to map them as
* readonly, no write possible.
*
* kmalloc'ed memory, physically contiguous, one page only, readonly
*/
if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
dev_info(&dd->pcidev->dev, "FAIL on pioavailregs_dma: "
"reqlen %lx > actual %lx\n",
vma->vm_end - vma->vm_start,
(unsigned long) PAGE_SIZE);
ret = -EFAULT;
goto bail;
}
if (vma->vm_flags & VM_WRITE) {
dev_info(&dd->pcidev->dev,
"Can't map pioavailregs as writable (flags=%lx)\n",
vma->vm_flags);
ret = -EPERM;
goto bail;
}
/* don't allow them to later change with mprotect */
vma->vm_flags &= ~VM_MAYWRITE;
ret = remap_pfn_range(vma, vma->vm_start,
dd->ipath_pioavailregs_phys >> PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot);
bail:
return ret;
}
/** /**
* ipath_mmap - mmap various structures into user space * ipath_mmap - mmap various structures into user space
* @fp: the file pointer * @fp: the file pointer
...@@ -1151,6 +1067,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) ...@@ -1151,6 +1067,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
pd = port_fp(fp); pd = port_fp(fp);
dd = pd->port_dd; dd = pd->port_dd;
/* /*
* This is the ipath_do_user_init() code, mapping the shared buffers * This is the ipath_do_user_init() code, mapping the shared buffers
* into the user process. The address referred to by vm_pgoff is the * into the user process. The address referred to by vm_pgoff is the
...@@ -1160,29 +1077,59 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) ...@@ -1160,29 +1077,59 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
pgaddr = vma->vm_pgoff << PAGE_SHIFT; pgaddr = vma->vm_pgoff << PAGE_SHIFT;
/* /*
* note that ureg does *NOT* have the kregvirt as part of it, to be * Must fit in 40 bits for our hardware; some checked elsewhere,
* sure that for 32 bit programs, we don't end up trying to map a > * but we'll be paranoid. Check for 0 is mostly in case one of the
* 44 address. Has to match ipath_get_base_info() code that sets * allocations failed, but user called mmap anyway. We want to catch
* __spi_uregbase * that before it can match.
*/ */
if (!pgaddr || pgaddr >= (1ULL<<40)) {
ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n",
(unsigned long long)pgaddr, vma->vm_start, vma->vm_end);
return -EINVAL;
}
/* just the offset of the port user registers, not physical addr */
ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u\n", ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n",
(unsigned long long) pgaddr, vma->vm_start, (unsigned long long) pgaddr, vma->vm_start,
vma->vm_end - vma->vm_start, dd->ipath_unit, vma->vm_end - vma->vm_start);
pd->port_port);
if (pgaddr == ureg) if (vma->vm_start & (PAGE_SIZE-1)) {
ipath_dev_err(dd,
"vm_start not aligned: %lx, end=%lx phys %lx\n",
vma->vm_start, vma->vm_end, (unsigned long)pgaddr);
ret = -EINVAL;
}
else if (pgaddr == ureg)
ret = mmap_ureg(vma, dd, ureg); ret = mmap_ureg(vma, dd, ureg);
else if (pgaddr == pd->port_piobufs) else if (pgaddr == pd->port_piobufs)
ret = mmap_piobufs(vma, dd, pd); ret = mmap_piobufs(vma, dd, pd);
else if (pgaddr == (u64) pd->port_rcvegr_phys) else if (pgaddr == (u64) pd->port_rcvegr_phys)
ret = mmap_rcvegrbufs(vma, pd); ret = mmap_rcvegrbufs(vma, pd);
else if (pgaddr == (u64) pd->port_rcvhdrq_phys) else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
ret = mmap_rcvhdrq(vma, pd); /*
* The rcvhdrq itself; readonly except on HT-400 (so have
* to allow writable mapping), multiple pages, contiguous
* from an i/o perspective.
*/
unsigned total_size =
ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize
* sizeof(u32), PAGE_SIZE);
ret = ipath_mmap_mem(vma, pd, total_size, 1,
pd->port_rcvhdrq_phys,
"rcvhdrq");
}
else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys)
/* in-memory copy of rcvhdrq tail register */
ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
pd->port_rcvhdrqtailaddr_phys,
"rcvhdrq tail");
else if (pgaddr == dd->ipath_pioavailregs_phys) else if (pgaddr == dd->ipath_pioavailregs_phys)
ret = mmap_pioavailregs(vma, pd); /* in-memory copy of pioavail registers */
ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
dd->ipath_pioavailregs_phys,
"pioavail registers");
else else
ret = -EINVAL; ret = -EINVAL;
...@@ -1539,14 +1486,6 @@ static int ipath_close(struct inode *in, struct file *fp) ...@@ -1539,14 +1486,6 @@ static int ipath_close(struct inode *in, struct file *fp)
} }
if (dd->ipath_kregbase) { if (dd->ipath_kregbase) {
if (pd->port_rcvhdrtail_uaddr) {
pd->port_rcvhdrtail_uaddr = 0;
pd->port_rcvhdrtail_kvaddr = NULL;
ipath_release_user_pages_on_close(
&pd->port_rcvhdrtail_pagep, 1);
pd->port_rcvhdrtail_pagep = NULL;
ipath_stats.sps_pageunlocks++;
}
ipath_write_kreg_port( ipath_write_kreg_port(
dd, dd->ipath_kregs->kr_rcvhdrtailaddr, dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
port, 0ULL); port, 0ULL);
...@@ -1583,9 +1522,9 @@ static int ipath_close(struct inode *in, struct file *fp) ...@@ -1583,9 +1522,9 @@ static int ipath_close(struct inode *in, struct file *fp)
dd->ipath_f_clear_tids(dd, pd->port_port); dd->ipath_f_clear_tids(dd, pd->port_port);
ipath_free_pddata(dd, pd->port_port, 0); dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
mutex_unlock(&ipath_mutex); mutex_unlock(&ipath_mutex);
ipath_free_pddata(dd, pd); /* after releasing the mutex */
return ret; return ret;
} }
...@@ -1905,3 +1844,4 @@ void ipath_user_remove(struct ipath_devdata *dd) ...@@ -1905,3 +1844,4 @@ void ipath_user_remove(struct ipath_devdata *dd)
bail: bail:
return; return;
} }
...@@ -411,17 +411,8 @@ static int init_pioavailregs(struct ipath_devdata *dd) ...@@ -411,17 +411,8 @@ static int init_pioavailregs(struct ipath_devdata *dd)
/* and its length */ /* and its length */
dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]); dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
if (dd->ipath_unit * 64 > (IPATH_PORT0_RCVHDRTAIL_SIZE - 64)) { ret = 0;
ipath_dev_err(dd, "unit %u too large for port 0 "
"rcvhdrtail buffer size\n", dd->ipath_unit);
ret = -ENODEV;
}
else
ret = 0;
/* so we can get current tail in ipath_kreceive(), per chip */
dd->ipath_hdrqtailptr = &ipath_port0_rcvhdrtail[
dd->ipath_unit * (64 / sizeof(*ipath_port0_rcvhdrtail))];
done: done:
return ret; return ret;
} }
...@@ -654,7 +645,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) ...@@ -654,7 +645,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
{ {
int ret = 0, i; int ret = 0, i;
u32 val32, kpiobufs; u32 val32, kpiobufs;
u64 val, atmp; u64 val;
struct ipath_portdata *pd = NULL; /* keep gcc4 happy */ struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
ret = init_housekeeping(dd, &pd, reinit); ret = init_housekeeping(dd, &pd, reinit);
...@@ -777,24 +768,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) ...@@ -777,24 +768,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
goto done; goto done;
} }
val = ipath_port0_rcvhdrtail_dma + dd->ipath_unit * 64;
/* verify that the alignment requirement was met */
ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
0, val);
atmp = ipath_read_kreg64_port(
dd, dd->ipath_kregs->kr_rcvhdrtailaddr, 0);
if (val != atmp) {
ipath_dev_err(dd, "Catastrophic software error, "
"RcvHdrTailAddr0 written as %llx, "
"read back as %llx from %x\n",
(unsigned long long) val,
(unsigned long long) atmp,
dd->ipath_kregs->kr_rcvhdrtailaddr);
ret = -EINVAL;
goto done;
}
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP);
/* /*
...@@ -845,12 +818,18 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) ...@@ -845,12 +818,18 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
* re-init, the simplest way to handle this is to free * re-init, the simplest way to handle this is to free
* existing, and re-allocate. * existing, and re-allocate.
*/ */
if (reinit) if (reinit) {
ipath_free_pddata(dd, 0, 0); struct ipath_portdata *pd = dd->ipath_pd[0];
dd->ipath_pd[0] = NULL;
ipath_free_pddata(dd, pd);
}
dd->ipath_f_tidtemplate(dd); dd->ipath_f_tidtemplate(dd);
ret = ipath_create_rcvhdrq(dd, pd); ret = ipath_create_rcvhdrq(dd, pd);
if (!ret) if (!ret) {
dd->ipath_hdrqtailptr =
(volatile __le64 *)pd->port_rcvhdrtail_kvaddr;
ret = create_port0_egr(dd); ret = create_port0_egr(dd);
}
if (ret) if (ret)
ipath_dev_err(dd, "failed to allocate port 0 (kernel) " ipath_dev_err(dd, "failed to allocate port 0 (kernel) "
"rcvhdrq and/or egr bufs\n"); "rcvhdrq and/or egr bufs\n");
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "ips_common.h" #include "ips_common.h"
#include "ipath_layer.h" #include "ipath_layer.h"
/* These are all rcv-related errors which we want to count for stats */
#define E_SUM_PKTERRS \ #define E_SUM_PKTERRS \
(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \ (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \ INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
...@@ -45,6 +46,7 @@ ...@@ -45,6 +46,7 @@
INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \ INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP) INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
/* These are all send-related errors which we want to count for stats */
#define E_SUM_ERRS \ #define E_SUM_ERRS \
(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \ (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
...@@ -52,6 +54,18 @@ ...@@ -52,6 +54,18 @@
INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \ INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
INFINIPATH_E_INVALIDADDR) INFINIPATH_E_INVALIDADDR)
/*
* these are errors that can occur when the link changes state while
* a packet is being sent or received. This doesn't cover things
* like EBP or VCRC that can be the result of a sending having the
* link change state, so we receive a "known bad" packet.
*/
#define E_SUM_LINK_PKTERRS \
(INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \
INFINIPATH_E_RUNEXPCHAR)
static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
{ {
unsigned long sbuf[4]; unsigned long sbuf[4];
...@@ -101,9 +115,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) ...@@ -101,9 +115,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
if (ipath_debug & __IPATH_PKTDBG) if (ipath_debug & __IPATH_PKTDBG)
printk("\n"); printk("\n");
} }
if ((errs & (INFINIPATH_E_SDROPPEDDATAPKT | if ((errs & E_SUM_LINK_PKTERRS) &&
INFINIPATH_E_SDROPPEDSMPPKT |
INFINIPATH_E_SMINPKTLEN)) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) { !(dd->ipath_flags & IPATH_LINKACTIVE)) {
/* /*
* This can happen when SMA is trying to bring the link * This can happen when SMA is trying to bring the link
...@@ -112,11 +124,9 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) ...@@ -112,11 +124,9 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
* valid. We don't want to confuse people, so we just * valid. We don't want to confuse people, so we just
* don't print them, except at debug * don't print them, except at debug
*/ */
ipath_dbg("Ignoring pktsend errors %llx, because not " ipath_dbg("Ignoring packet errors %llx, because link not "
"yet active\n", (unsigned long long) errs); "ACTIVE\n", (unsigned long long) errs);
ignore_this_time = INFINIPATH_E_SDROPPEDDATAPKT | ignore_this_time = errs & E_SUM_LINK_PKTERRS;
INFINIPATH_E_SDROPPEDSMPPKT |
INFINIPATH_E_SMINPKTLEN;
} }
return ignore_this_time; return ignore_this_time;
...@@ -157,7 +167,29 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, ...@@ -157,7 +167,29 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
*/ */
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
lstate = val & IPATH_IBSTATE_MASK; lstate = val & IPATH_IBSTATE_MASK;
if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
/*
* this is confusing enough when it happens that I want to always put it
* on the console and in the logs. If it was a requested state change,
* we'll have already cleared the flags, so we won't print this warning
*/
if ((lstate != IPATH_IBSTATE_ARM && lstate != IPATH_IBSTATE_ACTIVE)
&& (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
dev_info(&dd->pcidev->dev, "Link state changed from %s to %s\n",
(dd->ipath_flags & IPATH_LINKARMED) ? "ARM" : "ACTIVE",
ib_linkstate(lstate));
/*
* Flush all queued sends when link went to DOWN or INIT,
* to be sure that they don't block SMA and other MAD packets
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
INFINIPATH_S_ABORT);
ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
(unsigned)(dd->ipath_piobcnt2k +
dd->ipath_piobcnt4k) -
dd->ipath_lastport_piobuf);
}
else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
lstate == IPATH_IBSTATE_ACTIVE) { lstate == IPATH_IBSTATE_ACTIVE) {
/* /*
* only print at SMA if there is a change, debug if not * only print at SMA if there is a change, debug if not
...@@ -380,6 +412,19 @@ static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs) ...@@ -380,6 +412,19 @@ static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
if (errs & E_SUM_ERRS) if (errs & E_SUM_ERRS)
ignore_this_time = handle_e_sum_errs(dd, errs); ignore_this_time = handle_e_sum_errs(dd, errs);
else if ((errs & E_SUM_LINK_PKTERRS) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*
* This can happen when SMA is trying to bring the link
* up, but the IB link changes state at the "wrong" time.
* The IB logic then complains that the packet isn't
* valid. We don't want to confuse people, so we just
* don't print them, except at debug
*/
ipath_dbg("Ignoring packet errors %llx, because link not "
"ACTIVE\n", (unsigned long long) errs);
ignore_this_time = errs & E_SUM_LINK_PKTERRS;
}
if (supp_msgs == 250000) { if (supp_msgs == 250000) {
/* /*
......
...@@ -62,9 +62,7 @@ struct ipath_portdata { ...@@ -62,9 +62,7 @@ struct ipath_portdata {
/* rcvhdrq base, needs mmap before useful */ /* rcvhdrq base, needs mmap before useful */
void *port_rcvhdrq; void *port_rcvhdrq;
/* kernel virtual address where hdrqtail is updated */ /* kernel virtual address where hdrqtail is updated */
u64 *port_rcvhdrtail_kvaddr; volatile __le64 *port_rcvhdrtail_kvaddr;
/* page * used for uaddr */
struct page *port_rcvhdrtail_pagep;
/* /*
* temp buffer for expected send setup, allocated at open, instead * temp buffer for expected send setup, allocated at open, instead
* of each setup call * of each setup call
...@@ -79,11 +77,7 @@ struct ipath_portdata { ...@@ -79,11 +77,7 @@ struct ipath_portdata {
dma_addr_t port_rcvegr_phys; dma_addr_t port_rcvegr_phys;
/* mmap of hdrq, must fit in 44 bits */ /* mmap of hdrq, must fit in 44 bits */
dma_addr_t port_rcvhdrq_phys; dma_addr_t port_rcvhdrq_phys;
/* dma_addr_t port_rcvhdrqtailaddr_phys;
* the actual user address that we ipath_mlock'ed, so we can
* ipath_munlock it at close
*/
unsigned long port_rcvhdrtail_uaddr;
/* /*
* number of opens on this instance (0 or 1; ignoring forks, dup, * number of opens on this instance (0 or 1; ignoring forks, dup,
* etc. for now) * etc. for now)
...@@ -515,11 +509,6 @@ struct ipath_devdata { ...@@ -515,11 +509,6 @@ struct ipath_devdata {
u8 ipath_lmc; u8 ipath_lmc;
}; };
extern volatile __le64 *ipath_port0_rcvhdrtail;
extern dma_addr_t ipath_port0_rcvhdrtail_dma;
#define IPATH_PORT0_RCVHDRTAIL_SIZE PAGE_SIZE
extern struct list_head ipath_dev_list; extern struct list_head ipath_dev_list;
extern spinlock_t ipath_devs_lock; extern spinlock_t ipath_devs_lock;
extern struct ipath_devdata *ipath_lookup(int unit); extern struct ipath_devdata *ipath_lookup(int unit);
...@@ -579,7 +568,7 @@ void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first, ...@@ -579,7 +568,7 @@ void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
unsigned cnt); unsigned cnt);
int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *); int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
void ipath_free_pddata(struct ipath_devdata *, u32, int); void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
int ipath_parse_ushort(const char *str, unsigned short *valp); int ipath_parse_ushort(const char *str, unsigned short *valp);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment