Commit 8e1bf9ff authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
  IB/ehca: Wait for async events to finish before destroying QP
  IB/ipath: Fix SDMA error recovery in absence of link status change
  IB/ipath: Need to always request and handle PIO avail interrupts
  IB/ipath: Fix count of packets received by kernel
  IB/ipath: Return the correct opcode for RDMA WRITE with immediate
  IB/ipath: Fix bug that can leave sends disabled after freeze recovery
  IB/ipath: Only increment SSN if WQE is put on send queue
  IB/ipath: Only warn about prototype chip during init
  RDMA/cxgb3: Fix severe limit on userspace memory registration size
  RDMA/cxgb3: Don't add PBL memory to gen_pool in chunks
parents 148c69b4 12137c59
......@@ -588,7 +588,7 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
* caller aquires the ctrl_qp lock before the call
*/
static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
u32 len, void *data, int completion)
u32 len, void *data)
{
u32 i, nr_wqe, copy_len;
u8 *copy_data;
......@@ -624,7 +624,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
flag = 0;
if (i == (nr_wqe - 1)) {
/* last WQE */
flag = completion ? T3_COMPLETION_FLAG : 0;
flag = T3_COMPLETION_FLAG;
if (len % 32)
utx_len = len / 32 + 1;
else
......@@ -683,21 +683,20 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
return 0;
}
/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size
* OUT: stag index, actual pbl_size, pbl_addr allocated.
/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr
* OUT: stag index
* TBD: shared memory region support
*/
static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
u32 *stag, u8 stag_state, u32 pdid,
enum tpt_mem_type type, enum tpt_mem_perm perm,
u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl,
u32 *pbl_size, u32 *pbl_addr)
u32 zbva, u64 to, u32 len, u8 page_size,
u32 pbl_size, u32 pbl_addr)
{
int err;
struct tpt_entry tpt;
u32 stag_idx;
u32 wptr;
int rereg = (*stag != T3_STAG_UNSET);
stag_state = stag_state > 0;
stag_idx = (*stag) >> 8;
......@@ -711,30 +710,8 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
__func__, stag_state, type, pdid, stag_idx);
if (reset_tpt_entry)
cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
else if (!rereg) {
*pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
if (!*pbl_addr) {
return -ENOMEM;
}
}
mutex_lock(&rdev_p->ctrl_qp.lock);
/* write PBL first if any - update pbl only if pbl list exist */
if (pbl) {
PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
__func__, *pbl_addr, rdev_p->rnic_info.pbl_base,
*pbl_size);
err = cxio_hal_ctrl_qp_write_mem(rdev_p,
(*pbl_addr >> 5),
(*pbl_size << 3), pbl, 0);
if (err)
goto ret;
}
/* write TPT entry */
if (reset_tpt_entry)
memset(&tpt, 0, sizeof(tpt));
......@@ -749,23 +726,23 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
V_TPT_PAGE_SIZE(page_size));
tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3));
cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
tpt.len = cpu_to_be32(len);
tpt.va_hi = cpu_to_be32((u32) (to >> 32));
tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
tpt.rsvd_bind_cnt_or_pstag = 0;
tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2));
cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
}
err = cxio_hal_ctrl_qp_write_mem(rdev_p,
stag_idx +
(rdev_p->rnic_info.tpt_base >> 5),
sizeof(tpt), &tpt, 1);
sizeof(tpt), &tpt);
/* release the stag index to free pool */
if (reset_tpt_entry)
cxio_hal_put_stag(rdev_p->rscp, stag_idx);
ret:
wptr = rdev_p->ctrl_qp.wptr;
mutex_unlock(&rdev_p->ctrl_qp.lock);
if (!err)
......@@ -776,44 +753,67 @@ ret:
return err;
}
int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
u32 pbl_addr, u32 pbl_size)
{
u32 wptr;
int err;
PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
__func__, pbl_addr, rdev_p->rnic_info.pbl_base,
pbl_size);
mutex_lock(&rdev_p->ctrl_qp.lock);
err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
pbl);
wptr = rdev_p->ctrl_qp.wptr;
mutex_unlock(&rdev_p->ctrl_qp.lock);
if (err)
return err;
if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
SEQ32_GE(rdev_p->ctrl_qp.rptr,
wptr)))
return -ERESTARTSYS;
return 0;
}
int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
u8 page_size, __be64 *pbl, u32 *pbl_size,
u32 *pbl_addr)
u8 page_size, u32 pbl_size, u32 pbl_addr)
{
*stag = T3_STAG_UNSET;
return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
zbva, to, len, page_size, pbl_size, pbl_addr);
}
int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
u8 page_size, __be64 *pbl, u32 *pbl_size,
u32 *pbl_addr)
u8 page_size, u32 pbl_size, u32 pbl_addr)
{
return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
zbva, to, len, page_size, pbl_size, pbl_addr);
}
int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
u32 pbl_addr)
{
return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
&pbl_size, &pbl_addr);
return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
pbl_size, pbl_addr);
}
int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
{
u32 pbl_size = 0;
*stag = T3_STAG_UNSET;
return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
NULL, &pbl_size, NULL);
0, 0);
}
int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
{
return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
NULL, NULL);
return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
0, 0);
}
int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
......
......@@ -154,14 +154,14 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
struct cxio_ucontext *uctx);
int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
u32 pbl_addr, u32 pbl_size);
int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
u8 page_size, __be64 *pbl, u32 *pbl_size,
u32 *pbl_addr);
u8 page_size, u32 pbl_size, u32 pbl_addr);
int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
u8 page_size, __be64 *pbl, u32 *pbl_size,
u32 *pbl_addr);
u8 page_size, u32 pbl_size, u32 pbl_addr);
int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
u32 pbl_addr);
int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
......
......@@ -250,7 +250,6 @@ void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
*/
#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
#define PBL_CHUNK 2*1024*1024
u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
{
......@@ -267,14 +266,35 @@ void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
{
unsigned long i;
unsigned pbl_start, pbl_chunk;
rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
if (rdev_p->pbl_pool)
for (i = rdev_p->rnic_info.pbl_base;
i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1;
i += PBL_CHUNK)
gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1);
return rdev_p->pbl_pool ? 0 : -ENOMEM;
if (!rdev_p->pbl_pool)
return -ENOMEM;
pbl_start = rdev_p->rnic_info.pbl_base;
pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1;
while (pbl_start < rdev_p->rnic_info.pbl_top) {
pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
pbl_chunk);
if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
PDBG("%s failed to add PBL chunk (%x/%x)\n",
__func__, pbl_start, pbl_chunk);
if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n",
__func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start);
return 0;
}
pbl_chunk >>= 1;
} else {
PDBG("%s added PBL chunk (%x/%x)\n",
__func__, pbl_start, pbl_chunk);
pbl_start += pbl_chunk;
}
}
return 0;
}
void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
......
......@@ -35,17 +35,26 @@
#include <rdma/ib_verbs.h>
#include "cxio_hal.h"
#include "cxio_resource.h"
#include "iwch.h"
#include "iwch_provider.h"
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
__be64 *page_list)
static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
{
u32 stag;
u32 mmid;
mhp->attr.state = 1;
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
}
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp, int shift)
{
u32 stag;
if (cxio_register_phys_mem(&rhp->rdev,
&stag, mhp->attr.pdid,
......@@ -53,28 +62,21 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
mhp->attr.zbva,
mhp->attr.va_fbo,
mhp->attr.len,
shift-12,
page_list,
&mhp->attr.pbl_size, &mhp->attr.pbl_addr))
shift - 12,
mhp->attr.pbl_size, mhp->attr.pbl_addr))
return -ENOMEM;
mhp->attr.state = 1;
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
insert_handle(rhp, &rhp->mmidr, mhp, mmid);
PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
iwch_finish_mem_reg(mhp, stag);
return 0;
}
int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
__be64 *page_list,
int npages)
{
u32 stag;
u32 mmid;
/* We could support this... */
if (npages > mhp->attr.pbl_size)
......@@ -87,19 +89,40 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
mhp->attr.zbva,
mhp->attr.va_fbo,
mhp->attr.len,
shift-12,
page_list,
&mhp->attr.pbl_size, &mhp->attr.pbl_addr))
shift - 12,
mhp->attr.pbl_size, mhp->attr.pbl_addr))
return -ENOMEM;
mhp->attr.state = 1;
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
insert_handle(rhp, &rhp->mmidr, mhp, mmid);
PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
iwch_finish_mem_reg(mhp, stag);
return 0;
}
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
{
mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
npages << 3);
if (!mhp->attr.pbl_addr)
return -ENOMEM;
mhp->attr.pbl_size = npages;
return 0;
}
void iwch_free_pbl(struct iwch_mr *mhp)
{
cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);
}
int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
{
return cxio_write_pbl(&mhp->rhp->rdev, pages,
mhp->attr.pbl_addr + (offset << 3), npages);
}
int build_phys_page_list(struct ib_phys_buf *buffer_list,
int num_phys_buf,
u64 *iova_start,
......
......@@ -442,6 +442,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
mmid = mhp->attr.stag >> 8;
cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
iwch_free_pbl(mhp);
remove_handle(rhp, &rhp->mmidr, mmid);
if (mhp->kva)
kfree((void *) (unsigned long) mhp->kva);
......@@ -475,6 +476,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
if (!mhp)
return ERR_PTR(-ENOMEM);
mhp->rhp = rhp;
/* First check that we have enough alignment */
if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
ret = -EINVAL;
......@@ -492,7 +495,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
if (ret)
goto err;
mhp->rhp = rhp;
ret = iwch_alloc_pbl(mhp, npages);
if (ret) {
kfree(page_list);
goto err_pbl;
}
ret = iwch_write_pbl(mhp, page_list, npages, 0);
kfree(page_list);
if (ret)
goto err_pbl;
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
......@@ -502,12 +515,15 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
mhp->attr.len = (u32) total_size;
mhp->attr.pbl_size = npages;
ret = iwch_register_mem(rhp, php, mhp, shift, page_list);
kfree(page_list);
if (ret) {
goto err;
}
ret = iwch_register_mem(rhp, php, mhp, shift);
if (ret)
goto err_pbl;
return &mhp->ibmr;
err_pbl:
iwch_free_pbl(mhp);
err:
kfree(mhp);
return ERR_PTR(ret);
......@@ -560,7 +576,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
return ret;
}
ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
kfree(page_list);
if (ret) {
return ret;
......@@ -602,6 +618,8 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mhp)
return ERR_PTR(-ENOMEM);
mhp->rhp = rhp;
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
......@@ -615,10 +633,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
err = iwch_alloc_pbl(mhp, n);
if (err)
goto err;
pages = (__be64 *) __get_free_page(GFP_KERNEL);
if (!pages) {
err = -ENOMEM;
goto err;
goto err_pbl;
}
i = n = 0;
......@@ -630,25 +652,38 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
if (err)
goto pbl_done;
n += i;
i = 0;
}
}
}
mhp->rhp = rhp;
if (i)
err = iwch_write_pbl(mhp, pages, i, n);
pbl_done:
free_page((unsigned long) pages);
if (err)
goto err_pbl;
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc);
mhp->attr.va_fbo = virt;
mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) length;
mhp->attr.pbl_size = i;
err = iwch_register_mem(rhp, php, mhp, shift, pages);
kfree(pages);
err = iwch_register_mem(rhp, php, mhp, shift);
if (err)
goto err;
goto err_pbl;
if (udata && !t3a_device(rhp)) {
uresp.pbl_addr = (mhp->attr.pbl_addr -
rhp->rdev.rnic_info.pbl_base) >> 3;
rhp->rdev.rnic_info.pbl_base) >> 3;
PDBG("%s user resp pbl_addr 0x%x\n", __func__,
uresp.pbl_addr);
......@@ -661,6 +696,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return &mhp->ibmr;
err_pbl:
iwch_free_pbl(mhp);
err:
ib_umem_release(mhp->umem);
kfree(mhp);
......
......@@ -340,14 +340,14 @@ int iwch_quiesce_qps(struct iwch_cq *chp);
int iwch_resume_qps(struct iwch_cq *chp);
void stop_read_rep_timer(struct iwch_qp *qhp);
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
__be64 *page_list);
struct iwch_mr *mhp, int shift);
int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
__be64 *page_list,
int npages);
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
void iwch_free_pbl(struct iwch_mr *mhp);
int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
int build_phys_page_list(struct ib_phys_buf *buffer_list,
int num_phys_buf,
u64 *iova_start,
......
......@@ -192,6 +192,8 @@ struct ehca_qp {
int mtu_shift;
u32 message_count;
u32 packet_count;
atomic_t nr_events; /* events seen */
wait_queue_head_t wait_completion;
};
#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
......
......@@ -204,6 +204,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
read_lock(&ehca_qp_idr_lock);
qp = idr_find(&ehca_qp_idr, token);
if (qp)
atomic_inc(&qp->nr_events);
read_unlock(&ehca_qp_idr_lock);
if (!qp)
......@@ -223,6 +225,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
if (fatal && qp->ext_type == EQPT_SRQBASE)
dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
if (atomic_dec_and_test(&qp->nr_events))
wake_up(&qp->wait_completion);
return;
}
......
......@@ -566,6 +566,8 @@ static struct ehca_qp *internal_create_qp(
return ERR_PTR(-ENOMEM);
}
atomic_set(&my_qp->nr_events, 0);
init_waitqueue_head(&my_qp->wait_completion);
spin_lock_init(&my_qp->spinlock_s);
spin_lock_init(&my_qp->spinlock_r);
my_qp->qp_type = qp_type;
......@@ -1934,6 +1936,9 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
idr_remove(&ehca_qp_idr, my_qp->token);
write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
/* now wait until all pending events have completed */
wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
if (h_ret != H_SUCCESS) {
ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li "
......
......@@ -1197,7 +1197,7 @@ void ipath_kreceive(struct ipath_portdata *pd)
}
reloop:
for (last = 0, i = 1; !last; i++) {
for (last = 0, i = 1; !last; i += !last) {
hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
eflags = ipath_hdrget_err_flags(rhf_addr);
etype = ipath_hdrget_rcv_type(rhf_addr);
......@@ -1428,6 +1428,40 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
}
/*
* used to force update of pioavailshadow if we can't get a pio buffer.
* Needed primarily due to exitting freeze mode after recovering
* from errors. Done lazily, because it's safer (known to not
* be writing pio buffers).
*/
static void ipath_reset_availshadow(struct ipath_devdata *dd)
{
int i, im;
unsigned long flags;
spin_lock_irqsave(&ipath_pioavail_lock, flags);
for (i = 0; i < dd->ipath_pioavregs; i++) {
u64 val, oldval;
/* deal with 6110 chip bug on high register #s */
im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
i ^ 1 : i;
val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
/*
* busy out the buffers not in the kernel avail list,
* without changing the generation bits.
*/
oldval = dd->ipath_pioavailshadow[i];
dd->ipath_pioavailshadow[i] = val |
((~dd->ipath_pioavailkernel[i] <<
INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
if (oldval != dd->ipath_pioavailshadow[i])
ipath_dbg("shadow[%d] was %Lx, now %lx\n",
i, oldval, dd->ipath_pioavailshadow[i]);
}
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
}
/**
* ipath_setrcvhdrsize - set the receive header size
* @dd: the infinipath device
......@@ -1482,9 +1516,12 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
*/
ipath_stats.sps_nopiobufs++;
if (!(++dd->ipath_consec_nopiobuf % 100000)) {
ipath_dbg("%u pio sends with no bufavail; dmacopy: "
"%llx %llx %llx %llx; shadow: %lx %lx %lx %lx\n",
ipath_force_pio_avail_update(dd); /* at start */
ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
"%llx %llx %llx %llx\n"
"ipath shadow: %lx %lx %lx %lx\n",
dd->ipath_consec_nopiobuf,
(unsigned long)get_cycles(),
(unsigned long long) le64_to_cpu(dma[0]),
(unsigned long long) le64_to_cpu(dma[1]),
(unsigned long long) le64_to_cpu(dma[2]),
......@@ -1496,14 +1533,17 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
*/
if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
(sizeof(shadow[0]) * 4 * 4))
ipath_dbg("2nd group: dmacopy: %llx %llx "
"%llx %llx; shadow: %lx %lx %lx %lx\n",
ipath_dbg("2nd group: dmacopy: "
"%llx %llx %llx %llx\n"
"ipath shadow: %lx %lx %lx %lx\n",
(unsigned long long)le64_to_cpu(dma[4]),
(unsigned long long)le64_to_cpu(dma[5]),
(unsigned long long)le64_to_cpu(dma[6]),
(unsigned long long)le64_to_cpu(dma[7]),
shadow[4], shadow[5], shadow[6],
shadow[7]);
shadow[4], shadow[5], shadow[6], shadow[7]);
/* at end, so update likely happened */
ipath_reset_availshadow(dd);
}
}
......@@ -1652,19 +1692,46 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
unsigned len, int avail)
{
unsigned long flags;
unsigned end;
unsigned end, cnt = 0, next;
/* There are two bits per send buffer (busy and generation) */
start *= 2;
len *= 2;
end = start + len;
end = start + len * 2;
/* Set or clear the generation bits. */
spin_lock_irqsave(&ipath_pioavail_lock, flags);
/* Set or clear the busy bit in the shadow. */
while (start < end) {
if (avail) {
__clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
dd->ipath_pioavailshadow);
unsigned long dma;
int i, im;
/*
* the BUSY bit will never be set, because we disarm
* the user buffers before we hand them back to the
* kernel. We do have to make sure the generation
* bit is set correctly in shadow, since it could
* have changed many times while allocated to user.
* We can't use the bitmap functions on the full
* dma array because it is always little-endian, so
* we have to flip to host-order first.
* BITS_PER_LONG is slightly wrong, since it's
* always 64 bits per register in chip...
* We only work on 64 bit kernels, so that's OK.
*/
/* deal with 6110 chip bug on high register #s */
i = start / BITS_PER_LONG;
im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
i ^ 1 : i;
__clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
+ start, dd->ipath_pioavailshadow);
dma = (unsigned long) le64_to_cpu(
dd->ipath_pioavailregs_dma[im]);
if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ start) % BITS_PER_LONG, &dma))
__set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ start, dd->ipath_pioavailshadow);
else
__clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ start, dd->ipath_pioavailshadow);
__set_bit(start, dd->ipath_pioavailkernel);
} else {
__set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
......@@ -1673,7 +1740,44 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
}
start += 2;
}
if (dd->ipath_pioupd_thresh) {
end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
next = find_first_bit(dd->ipath_pioavailkernel, end);
while (next < end) {
cnt++;
next = find_next_bit(dd->ipath_pioavailkernel, end,
next + 1);
}
}
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
/*
* When moving buffers from kernel to user, if number assigned to
* the user is less than the pio update threshold, and threshold
* is supported (cnt was computed > 0), drop the update threshold
* so we update at least once per allocated number of buffers.
* In any case, if the kernel buffers are less than the threshold,
* drop the threshold. We don't bother increasing it, having once
* decreased it, since it would typically just cycle back and forth.
* If we don't decrease below buffers in use, we can wait a long
* time for an update, until some other context uses PIO buffers.
*/
if (!avail && len < cnt)
cnt = len;
if (cnt < dd->ipath_pioupd_thresh) {
dd->ipath_pioupd_thresh = cnt;
ipath_dbg("Decreased pio update threshold to %u\n",
dd->ipath_pioupd_thresh);
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
<< INFINIPATH_S_UPDTHRESH_SHIFT);
dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
<< INFINIPATH_S_UPDTHRESH_SHIFT;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
}
}
/**
......@@ -1794,8 +1898,8 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
skip_cancel =
!test_bit(IPATH_SDMA_DISABLED, statp) &&
test_and_set_bit(IPATH_SDMA_ABORTING, statp);
test_and_set_bit(IPATH_SDMA_ABORTING, statp)
&& !test_bit(IPATH_SDMA_DISABLED, statp);
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
if (skip_cancel)
goto bail;
......@@ -1826,6 +1930,9 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
ipath_disarm_piobufs(dd, 0,
dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
if (restore_sendctrl) {
/* else done by caller later if needed */
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
......@@ -1845,7 +1952,6 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
/* only wait so long for intr */
dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
dd->ipath_sdma_reset_wait = 200;
__set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
......
......@@ -173,47 +173,25 @@ static int ipath_get_base_info(struct file *fp,
(void *) dd->ipath_statusp -
(void *) dd->ipath_pioavailregs_dma;
if (!shared) {
kinfo->spi_piocnt = dd->ipath_pbufsport;
kinfo->spi_piocnt = pd->port_piocnt;
kinfo->spi_piobufbase = (u64) pd->port_piobufs;
kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
dd->ipath_ureg_align * pd->port_port;
} else if (master) {
kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) +
(dd->ipath_pbufsport % subport_cnt);
kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
(pd->port_piocnt % subport_cnt);
/* Master's PIO buffers are after all the slave's */
kinfo->spi_piobufbase = (u64) pd->port_piobufs +
dd->ipath_palign *
(dd->ipath_pbufsport - kinfo->spi_piocnt);
(pd->port_piocnt - kinfo->spi_piocnt);
} else {
unsigned slave = subport_fp(fp) - 1;
kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
kinfo->spi_piobufbase = (u64) pd->port_piobufs +
dd->ipath_palign * kinfo->spi_piocnt * slave;
}
/*
* Set the PIO avail update threshold to no larger
* than the number of buffers per process. Note that
* we decrease it here, but won't ever increase it.
*/
if (dd->ipath_pioupd_thresh &&
kinfo->spi_piocnt < dd->ipath_pioupd_thresh) {
unsigned long flags;
dd->ipath_pioupd_thresh = kinfo->spi_piocnt;
ipath_dbg("Decreased pio update threshold to %u\n",
dd->ipath_pioupd_thresh);
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
<< INFINIPATH_S_UPDTHRESH_SHIFT);
dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
<< INFINIPATH_S_UPDTHRESH_SHIFT;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
}
if (shared) {
kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
dd->ipath_ureg_align * pd->port_port;
......@@ -1309,19 +1287,19 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
if (!pd->port_subport_cnt) {
/* port is not shared */
piocnt = dd->ipath_pbufsport;
piocnt = pd->port_piocnt;
piobufs = pd->port_piobufs;
} else if (!subport_fp(fp)) {
/* caller is the master */
piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
(dd->ipath_pbufsport % pd->port_subport_cnt);
piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
(pd->port_piocnt % pd->port_subport_cnt);
piobufs = pd->port_piobufs +
dd->ipath_palign * (dd->ipath_pbufsport - piocnt);
dd->ipath_palign * (pd->port_piocnt - piocnt);
} else {
unsigned slave = subport_fp(fp) - 1;
/* caller is a slave */
piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
piocnt = pd->port_piocnt / pd->port_subport_cnt;
piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
}
......@@ -1633,9 +1611,6 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
port_fp(fp) = pd;
pd->port_pid = current->pid;
strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
ipath_chg_pioavailkernel(dd,
dd->ipath_pbufsport * (pd->port_port - 1),
dd->ipath_pbufsport, 0);
ipath_stats.sps_ports++;
ret = 0;
} else
......@@ -1938,11 +1913,25 @@ static int ipath_do_user_init(struct file *fp,
/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
/* some ports may get extra buffers, calculate that here */
if (pd->port_port <= dd->ipath_ports_extrabuf)
pd->port_piocnt = dd->ipath_pbufsport + 1;
else
pd->port_piocnt = dd->ipath_pbufsport;
/* for right now, kernel piobufs are at end, so port 1 is at 0 */
if (pd->port_port <= dd->ipath_ports_extrabuf)
pd->port_pio_base = (dd->ipath_pbufsport + 1)
* (pd->port_port - 1);
else
pd->port_pio_base = dd->ipath_ports_extrabuf +
dd->ipath_pbufsport * (pd->port_port - 1);
pd->port_piobufs = dd->ipath_piobufbase +
dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign;
ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
pd->port_port, pd->port_piobufs);
pd->port_pio_base * dd->ipath_palign;
ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
" first pio %u\n", pd->port_port, pd->port_piobufs,
pd->port_piocnt, pd->port_pio_base);
ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
/*
* Now allocate the rcvhdr Q and eager TIDs; skip the TID
......@@ -2107,7 +2096,6 @@ static int ipath_close(struct inode *in, struct file *fp)
}
if (dd->ipath_kregbase) {
int i;
/* atomically clear receive enable port and intr avail. */
clear_bit(dd->ipath_r_portenable_shift + port,
&dd->ipath_rcvctrl);
......@@ -2136,9 +2124,9 @@ static int ipath_close(struct inode *in, struct file *fp)
ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
pd->port_port, dd->ipath_dummy_hdrq_phys);
i = dd->ipath_pbufsport * (port - 1);
ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
ipath_chg_pioavailkernel(dd, i, dd->ipath_pbufsport, 1);
ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
ipath_chg_pioavailkernel(dd, pd->port_pio_base,
pd->port_piocnt, 1);
dd->ipath_f_clear_tids(dd, pd->port_port);
......
......@@ -595,7 +595,7 @@ static void ipath_7220_txe_recover(struct ipath_devdata *dd)
dev_info(&dd->pcidev->dev,
"Recovering from TXE PIO parity error\n");
ipath_disarm_senderrbufs(dd, 1);
ipath_disarm_senderrbufs(dd);
}
......@@ -675,10 +675,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
/*
* Parity errors in send memory are recoverable,
* just cancel the send (if indicated in * sendbuffererror),
* count the occurrence, unfreeze (if no other handled
* hardware error bits are set), and continue.
* Parity errors in send memory are recoverable by h/w
* just do housekeeping, exit freeze mode and continue.
*/
if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
......@@ -687,13 +685,6 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
if (!hwerrs) {
/* else leave in freeze mode */
ipath_write_kreg(dd,
dd->ipath_kregs->kr_control,
dd->ipath_control);
goto bail;
}
}
if (hwerrs) {
/*
......@@ -723,8 +714,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
dd->ipath_flags &= ~IPATH_INITTED;
} else {
ipath_dbg("Clearing freezemode on ignored hardware "
"error\n");
ipath_dbg("Clearing freezemode on ignored or "
"recovered hardware error\n");
ipath_clear_freeze(dd);
}
}
......@@ -870,8 +861,9 @@ static int ipath_7220_boardname(struct ipath_devdata *dd, char *name,
"revision %u.%u!\n",
dd->ipath_majrev, dd->ipath_minrev);
ret = 1;
} else if (dd->ipath_minrev == 1) {
/* Rev1 chips are prototype. Complain, but allow use */
} else if (dd->ipath_minrev == 1 &&
!(dd->ipath_flags & IPATH_INITTED)) {
/* Rev1 chips are prototype. Complain at init, but allow use */
ipath_dev_err(dd, "Unsupported hardware "
"revision %u.%u, Contact support@qlogic.com\n",
dd->ipath_majrev, dd->ipath_minrev);
......@@ -1966,7 +1958,7 @@ static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports)
dd->ipath_rcvctrl);
dd->ipath_p0_rcvegrcnt = 2048; /* always */
if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
dd->ipath_pioreserved = 1; /* reserve a buffer */
dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */
}
......
......@@ -41,7 +41,7 @@
/*
* min buffers we want to have per port, after driver
*/
#define IPATH_MIN_USER_PORT_BUFCNT 8
#define IPATH_MIN_USER_PORT_BUFCNT 7
/*
* Number of ports we are configured to use (to allow for more pio
......@@ -54,13 +54,9 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
/*
* Number of buffers reserved for driver (verbs and layered drivers.)
* Reserved at end of buffer list. Initialized based on
* number of PIO buffers if not set via module interface.
* Initialized based on number of PIO buffers if not set via module interface.
* The problem with this is that it's global, but we'll use different
* numbers for different chip types. So the default value is not
* very useful. I've redefined it for the 1.3 release so that it's
* zero unless set by the user to something else, in which case we
* try to respect it.
* numbers for different chip types.
*/
static ushort ipath_kpiobufs;
......@@ -546,9 +542,12 @@ static void enable_chip(struct ipath_devdata *dd, int reinit)
pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
else
pioavail = dd->ipath_pioavailregs_dma[i];
dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail) |
(~dd->ipath_pioavailkernel[i] <<
INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
/*
* don't need to worry about ipath_pioavailkernel here
* because we will call ipath_chg_pioavailkernel() later
* in initialization, to busy out buffers as needed
*/
dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
}
/* can get counters, stats, etc. */
dd->ipath_flags |= IPATH_PRESENT;
......@@ -708,12 +707,11 @@ static void verify_interrupt(unsigned long opaque)
int ipath_init_chip(struct ipath_devdata *dd, int reinit)
{
int ret = 0;
u32 val32, kpiobufs;
u32 kpiobufs, defkbufs;
u32 piobufs, uports;
u64 val;
struct ipath_portdata *pd;
gfp_t gfp_flags = GFP_USER | __GFP_COMP;
unsigned long flags;
ret = init_housekeeping(dd, reinit);
if (ret)
......@@ -753,69 +751,52 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
/ (sizeof(u64) * BITS_PER_BYTE / 2);
uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
if (ipath_kpiobufs == 0) {
/* not set by user (this is default) */
if (piobufs > 144)
kpiobufs = 32;
else
kpiobufs = 16;
}
if (piobufs > 144)
defkbufs = 32 + dd->ipath_pioreserved;
else
kpiobufs = ipath_kpiobufs;
defkbufs = 16 + dd->ipath_pioreserved;
if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
if (ipath_kpiobufs && (ipath_kpiobufs +
(uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
int i = (int) piobufs -
(int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
if (i < 1)
i = 1;
dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
"%d for kernel leaves too few for %d user ports "
"(%d each); using %u\n", kpiobufs,
"(%d each); using %u\n", ipath_kpiobufs,
piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
/*
* shouldn't change ipath_kpiobufs, because could be
* different for different devices...
*/
kpiobufs = i;
}
} else if (ipath_kpiobufs)
kpiobufs = ipath_kpiobufs;
else
kpiobufs = defkbufs;
dd->ipath_lastport_piobuf = piobufs - kpiobufs;
dd->ipath_pbufsport =
uports ? dd->ipath_lastport_piobuf / uports : 0;
val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
if (val32 > 0) {
ipath_dbg("allocating %u pbufs/port leaves %u unused, "
"add to kernel\n", dd->ipath_pbufsport, val32);
dd->ipath_lastport_piobuf -= val32;
kpiobufs += val32;
ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n",
dd->ipath_pbufsport, val32);
}
/* if not an even divisor, some user ports get extra buffers */
dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
(dd->ipath_pbufsport * uports);
if (dd->ipath_ports_extrabuf)
ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
"ports <= %u\n", dd->ipath_pbufsport,
dd->ipath_ports_extrabuf);
dd->ipath_lastpioindex = 0;
dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
/* ipath_pioavailshadow initialized earlier */
ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
"each for %u user ports\n", kpiobufs,
piobufs, dd->ipath_pbufsport, uports);
if (dd->ipath_pioupd_thresh) {
if (dd->ipath_pbufsport < dd->ipath_pioupd_thresh)
dd->ipath_pioupd_thresh = dd->ipath_pbufsport;
if (kpiobufs < dd->ipath_pioupd_thresh)
dd->ipath_pioupd_thresh = kpiobufs;
}
ret = dd->ipath_f_early_init(dd);
if (ret) {
ipath_dev_err(dd, "Early initialization failure\n");
goto done;
}
/*
* Cancel any possible active sends from early driver load.
* Follows early_init because some chips have to initialize
* PIO buffers in early_init to avoid false parity errors.
*/
ipath_cancel_sends(dd, 0);
/*
* Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
* done after early_init.
......@@ -836,6 +817,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
dd->ipath_pioavailregs_phys);
/*
* this is to detect s/w errors, which the h/w works around by
* ignoring the low 6 bits of address, if it wasn't aligned.
......@@ -862,12 +844,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
/*
* before error clears, since we expect serdes pll errors during
* this, the first time after reset
......@@ -940,6 +916,19 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
else
enable_chip(dd, reinit);
/* after enable_chip, so pioavailshadow setup */
ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
/*
* Cancel any possible active sends from early driver load.
* Follows early_init because some chips have to initialize
* PIO buffers in early_init to avoid false parity errors.
* After enable and ipath_chg_pioavailkernel so we can safely
* enable pioavail updates and PIOENABLE; packets are now
* ready to go out.
*/
ipath_cancel_sends(dd, 1);
if (!reinit) {
/*
* Used when we close a port, for DMA already in flight
......
......@@ -38,42 +38,12 @@
#include "ipath_verbs.h"
#include "ipath_common.h"
/*
* clear (write) a pio buffer, to clear a parity error. This routine
* should only be called when in freeze mode, and the buffer should be
* canceled afterwards.
*/
static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
{
u32 __iomem *pbuf;
u32 dwcnt; /* dword count to write */
if (pnum < dd->ipath_piobcnt2k) {
pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
dd->ipath_palign);
dwcnt = dd->ipath_piosize2k >> 2;
}
else {
pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
(pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
dwcnt = dd->ipath_piosize4k >> 2;
}
dev_info(&dd->pcidev->dev,
"Rewrite PIO buffer %u, to recover from parity error\n",
pnum);
/* no flush required, since already in freeze */
writel(dwcnt + 1, pbuf);
while (--dwcnt)
writel(0, pbuf++);
}
/*
* Called when we might have an error that is specific to a particular
* PIO buffer, and may need to cancel that buffer, so it can be re-used.
* If rewrite is true, and bits are set in the sendbufferror registers,
* we'll write to the buffer, for error recovery on parity errors.
*/
void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
{
u32 piobcnt;
unsigned long sbuf[4];
......@@ -109,11 +79,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
}
for (i = 0; i < piobcnt; i++)
if (test_bit(i, sbuf)) {
if (rewrite)
ipath_clrpiobuf(dd, i);
if (test_bit(i, sbuf))
ipath_disarm_piobufs(dd, i, 1);
}
/* ignore armlaunch errs for a bit */
dd->ipath_lastcancel = jiffies+3;
}
......@@ -164,7 +131,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
{
u64 ignore_this_time = 0;
ipath_disarm_senderrbufs(dd, 0);
ipath_disarm_senderrbufs(dd);
if ((errs & E_SUM_LINK_PKTERRS) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*
......@@ -909,8 +876,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* processes (causing armlaunch), send errors due to going into freeze mode,
* etc., and try to avoid causing extra interrupts while doing so.
* Forcibly update the in-memory pioavail register copies after cleanup
* because the chip won't do it for anything changing while in freeze mode
* (we don't want to wait for the next pio buffer state change).
* because the chip won't do it while in freeze mode (the register values
* themselves are kept correct).
* Make sure that we don't lose any important interrupts by using the chip
* feature that says that writing 0 to a bit in *clear that is set in
* *status will cause an interrupt to be generated again (if allowed by
......@@ -918,43 +885,22 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
*/
void ipath_clear_freeze(struct ipath_devdata *dd)
{
int i, im;
u64 val;
/* disable error interrupts, to avoid confusion */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
/* also disable interrupts; errormask is sometimes overwriten */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
/*
* clear all sends, because they have may been
* completed by usercode while in freeze mode, and
* therefore would not be sent, and eventually
* might cause the process to run out of bufs
*/
ipath_cancel_sends(dd, 0);
ipath_cancel_sends(dd, 1);
/* clear the freeze, and be sure chip saw it */
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
dd->ipath_control);
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
/* ensure pio avail updates continue */
/* force in-memory update now we are out of freeze */
ipath_force_pio_avail_update(dd);
/*
* We just enabled pioavailupdate, so dma copy is almost certainly
* not yet right, so read the registers directly. Similar to init
*/
for (i = 0; i < dd->ipath_pioavregs; i++) {
/* deal with 6110 chip bug */
im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
i ^ 1 : i;
val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im);
dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val);
dd->ipath_pioavailshadow[i] = val |
(~dd->ipath_pioavailkernel[i] <<
INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
}
/*
* force new interrupt if any hwerr, error or interrupt bits are
* still set, and clear "safe" send packet errors related to freeze
......@@ -1312,10 +1258,8 @@ irqreturn_t ipath_intr(int irq, void *data)
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
handle_layer_pioavail(dd);
else
ipath_dbg("unexpected BUFAVAIL intr\n");
/* always process; sdma verbs uses PIO for acks and VL15 */
handle_layer_pioavail(dd);
}
ret = IRQ_HANDLED;
......
......@@ -117,6 +117,10 @@ struct ipath_portdata {
u16 port_subport_cnt;
/* non-zero if port is being shared. */
u16 port_subport_id;
/* number of pio bufs for this port (all procs, if shared) */
u32 port_piocnt;
/* first pio buffer for this port */
u32 port_pio_base;
/* chip offset of PIO buffers for this port */
u32 port_piobufs;
/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
......@@ -384,6 +388,8 @@ struct ipath_devdata {
u32 ipath_lastrpkts;
/* pio bufs allocated per port */
u32 ipath_pbufsport;
/* if remainder on bufs/port, ports < extrabuf get 1 extra */
u32 ipath_ports_extrabuf;
u32 ipath_pioupd_thresh; /* update threshold, some chips */
/*
* number of ports configured as max; zero is set to number chip
......@@ -1011,7 +1017,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *);
int ipath_update_eeprom_log(struct ipath_devdata *dd);
void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
void ipath_disarm_senderrbufs(struct ipath_devdata *);
void ipath_force_pio_avail_update(struct ipath_devdata *);
void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
......
......@@ -1746,7 +1746,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
qp->r_wrid_valid = 0;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
else
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
......
......@@ -481,9 +481,10 @@ done:
wake_up(&qp->wait);
}
static void want_buffer(struct ipath_devdata *dd)
static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
{
if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) {
if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
qp->ibqp.qp_type == IB_QPT_SMI) {
unsigned long flags;
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
......@@ -519,7 +520,7 @@ static void ipath_no_bufs_available(struct ipath_qp *qp,
spin_lock_irqsave(&dev->pending_lock, flags);
list_add_tail(&qp->piowait, &dev->piowait);
spin_unlock_irqrestore(&dev->pending_lock, flags);
want_buffer(dev->dd);
want_buffer(dev->dd, qp);
dev->n_piowait++;
}
......
......@@ -308,13 +308,15 @@ static void sdma_abort_task(unsigned long opaque)
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
/*
* Don't restart sdma here. Wait until link is up to ACTIVE.
* VL15 MADs used to bring the link up use PIO, and multiple
* link transitions otherwise cause the sdma engine to be
* Don't restart sdma here (with the exception
* below). Wait until link is up to ACTIVE. VL15 MADs
* used to bring the link up use PIO, and multiple link
* transitions otherwise cause the sdma engine to be
* stopped and started multiple times.
* The disable is done here, including the shadow, so the
* state is kept consistent.
* See ipath_restart_sdma() for the actual starting of sdma.
* The disable is done here, including the shadow,
* so the state is kept consistent.
* See ipath_restart_sdma() for the actual starting
* of sdma.
*/
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
......@@ -326,6 +328,13 @@ static void sdma_abort_task(unsigned long opaque)
/* make sure I see next message */
dd->ipath_sdma_abort_jiffies = 0;
/*
* Not everything that takes SDMA offline is a link
* status change. If the link was up, restart SDMA.
*/
if (dd->ipath_flags & IPATH_LINKACTIVE)
ipath_restart_sdma(dd);
goto done;
}
......@@ -427,7 +436,12 @@ int setup_sdma(struct ipath_devdata *dd)
goto done;
}
dd->ipath_sdma_status = 0;
/*
* Set initial status as if we had been up, then gone down.
* This lets initial start on transition to ACTIVE be the
* same as restart after link flap.
*/
dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
dd->ipath_sdma_abort_jiffies = 0;
dd->ipath_sdma_generation = 0;
dd->ipath_sdma_descq_tail = 0;
......@@ -449,16 +463,19 @@ int setup_sdma(struct ipath_devdata *dd)
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
dd->ipath_sdma_head_phys);
/* Reserve all the former "kernel" piobufs */
n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - dd->ipath_pioreserved;
for (i = dd->ipath_lastport_piobuf; i < n; ++i) {
/*
* Reserve all the former "kernel" piobufs, using high number range
* so we get as many 4K buffers as possible
*/
n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
ipath_chg_pioavailkernel(dd, i, n - i , 0);
for (; i < n; ++i) {
unsigned word = i / 64;
unsigned bit = i & 63;
BUG_ON(word >= 3);
senddmabufmask[word] |= 1ULL << bit;
}
ipath_chg_pioavailkernel(dd, dd->ipath_lastport_piobuf,
n - dd->ipath_lastport_piobuf, 0);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
senddmabufmask[0]);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
......@@ -615,6 +632,9 @@ void ipath_restart_sdma(struct ipath_devdata *dd)
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
/* notify upper layers */
ipath_ib_piobufavail(dd->verbs_dev);
bail:
return;
}
......
......@@ -396,7 +396,6 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
wqe = get_swqe_ptr(qp, qp->s_head);
wqe->wr = *wr;
wqe->ssn = qp->s_ssn++;
wqe->length = 0;
if (wr->num_sge) {
acc = wr->opcode >= IB_WR_RDMA_READ ?
......@@ -422,6 +421,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
goto bail_inval;
} else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
goto bail_inval;
wqe->ssn = qp->s_ssn++;
qp->s_head = next;
ret = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment