Commit 8aee74c8 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband:
  IB/cm: Improve local id allocation
  IPoIB/cm: Fix SRQ WR leak
  IB/ipoib: Fix typos in error messages
  IB/mlx4: Check if SRQ is full when posting receive
  IB/mlx4: Pass send queue sizes from userspace to kernel
  IB/mlx4: Fix check of opcode in mlx4_ib_post_send()
  mlx4_core: Fix array overrun in dump_dev_cap_flags()
  IB/mlx4: Fix RESET to RESET and RESET to ERROR transitions
  IB/mthca: Fix RESET to ERROR transition
  IB/mlx4: Set GRH:HopLimit when sending globally routed MADs
  IB/mthca: Set GRH:HopLimit when building MLX headers
  IB/mlx4: Fix check of max_qp_dest_rdma in modify QP
  IB/mthca: Fix use-after-free on device restart
  IB/ehca: Return proper error code if register_mr fails
  IPoIB: Handle P_Key table reordering
  IB/core: Use start_port() and end_port()
  IB/core: Add helpers for uncached GID and P_Key searches
  IB/ipath: Fix potential deadlock with multicast spinlocks
  IB/core: Free umem when mm is already gone
parents 080e8927 9f81036c
......@@ -306,7 +306,9 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
do {
spin_lock_irqsave(&cm.lock, flags);
ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
next_id++, &id);
next_id, &id);
if (!ret)
next_id = ((unsigned) id + 1) & MAX_ID_MASK;
spin_unlock_irqrestore(&cm.lock, flags);
} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
......
......@@ -150,6 +150,18 @@ static int alloc_name(char *name)
return 0;
}
static int start_port(struct ib_device *device)
{
return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
}
static int end_port(struct ib_device *device)
{
return (device->node_type == RDMA_NODE_IB_SWITCH) ?
0 : device->phys_port_cnt;
}
/**
* ib_alloc_device - allocate an IB device struct
* @size:size of structure to allocate
......@@ -209,6 +221,45 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
return 0;
}
static int read_port_table_lengths(struct ib_device *device)
{
struct ib_port_attr *tprops = NULL;
int num_ports, ret = -ENOMEM;
u8 port_index;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
goto out;
num_ports = end_port(device) - start_port(device) + 1;
device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports,
GFP_KERNEL);
device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports,
GFP_KERNEL);
if (!device->pkey_tbl_len || !device->gid_tbl_len)
goto err;
for (port_index = 0; port_index < num_ports; ++port_index) {
ret = ib_query_port(device, port_index + start_port(device),
tprops);
if (ret)
goto err;
device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len;
device->gid_tbl_len[port_index] = tprops->gid_tbl_len;
}
ret = 0;
goto out;
err:
kfree(device->gid_tbl_len);
kfree(device->pkey_tbl_len);
out:
kfree(tprops);
return ret;
}
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
......@@ -240,10 +291,19 @@ int ib_register_device(struct ib_device *device)
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
ret = read_port_table_lengths(device);
if (ret) {
printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n",
device->name);
goto out;
}
ret = ib_device_register_sysfs(device);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
device->name);
kfree(device->gid_tbl_len);
kfree(device->pkey_tbl_len);
goto out;
}
......@@ -285,6 +345,9 @@ void ib_unregister_device(struct ib_device *device)
list_del(&device->core_list);
kfree(device->gid_tbl_len);
kfree(device->pkey_tbl_len);
mutex_unlock(&device_mutex);
spin_lock_irqsave(&device->client_data_lock, flags);
......@@ -507,10 +570,7 @@ int ib_query_port(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr)
{
if (device->node_type == RDMA_NODE_IB_SWITCH) {
if (port_num)
return -EINVAL;
} else if (port_num < 1 || port_num > device->phys_port_cnt)
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
return device->query_port(device, port_num, port_attr);
......@@ -582,10 +642,7 @@ int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
if (device->node_type == RDMA_NODE_IB_SWITCH) {
if (port_num)
return -EINVAL;
} else if (port_num < 1 || port_num > device->phys_port_cnt)
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
return device->modify_port(device, port_num, port_modify_mask,
......@@ -593,6 +650,68 @@ int ib_modify_port(struct ib_device *device,
}
EXPORT_SYMBOL(ib_modify_port);
/**
* ib_find_gid - Returns the port number and GID table index where
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = start_port(device); port <= end_port(device); ++port) {
for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) {
ret = ib_query_gid(device, port, i, &tmp_gid);
if (ret)
return ret;
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
*port_num = port;
if (index)
*index = i;
return 0;
}
}
}
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_gid);
/**
* ib_find_pkey - Returns the PKey table index where a specified
* PKey value occurs.
* @device: The device to query.
* @port_num: The port number of the device to search for the PKey.
* @pkey: The PKey value to search for.
* @index: The index into the PKey table where the PKey was found.
*/
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index)
{
int ret, i;
u16 tmp_pkey;
for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
if (ret)
return ret;
if (pkey == tmp_pkey) {
*index = i;
return 0;
}
}
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_pkey);
static int __init ib_core_init(void)
{
int ret;
......
......@@ -210,8 +210,10 @@ void ib_umem_release(struct ib_umem *umem)
__ib_umem_release(umem->context->device, umem, 1);
mm = get_task_mm(current);
if (!mm)
if (!mm) {
kfree(umem);
return;
}
diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
......
......@@ -2050,13 +2050,10 @@ int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc)
switch (hipz_rc) {
case H_SUCCESS: /* successful completion */
return 0;
case H_ADAPTER_PARM: /* invalid adapter handle */
case H_RT_PARM: /* invalid resource type */
case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
case H_MLENGTH_PARM: /* invalid memory length */
case H_MEM_ACCESS_PARM: /* invalid access controls */
case H_CONSTRAINED: /* resource constraint */
return -EINVAL;
case H_NO_MEM:
return -ENOMEM;
case H_BUSY: /* long busy */
return -EBUSY;
default:
......
......@@ -165,10 +165,9 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
{
struct rb_node **n = &mcast_tree.rb_node;
struct rb_node *pn = NULL;
unsigned long flags;
int ret;
spin_lock_irqsave(&mcast_lock, flags);
spin_lock_irq(&mcast_lock);
while (*n) {
struct ipath_mcast *tmcast;
......@@ -228,7 +227,7 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
ret = 0;
bail:
spin_unlock_irqrestore(&mcast_lock, flags);
spin_unlock_irq(&mcast_lock);
return ret;
}
......@@ -289,17 +288,16 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct ipath_mcast *mcast = NULL;
struct ipath_mcast_qp *p, *tmp;
struct rb_node *n;
unsigned long flags;
int last = 0;
int ret;
spin_lock_irqsave(&mcast_lock, flags);
spin_lock_irq(&mcast_lock);
/* Find the GID in the mcast table. */
n = mcast_tree.rb_node;
while (1) {
if (n == NULL) {
spin_unlock_irqrestore(&mcast_lock, flags);
spin_unlock_irq(&mcast_lock);
ret = -EINVAL;
goto bail;
}
......@@ -334,7 +332,7 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
break;
}
spin_unlock_irqrestore(&mcast_lock, flags);
spin_unlock_irq(&mcast_lock);
if (p) {
/*
......@@ -348,9 +346,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
ipath_mcast_free(mcast);
spin_lock(&dev->n_mcast_grps_lock);
spin_lock_irq(&dev->n_mcast_grps_lock);
dev->n_mcast_grps_allocated--;
spin_unlock(&dev->n_mcast_grps_lock);
spin_unlock_irq(&dev->n_mcast_grps_lock);
}
ret = 0;
......
......@@ -188,14 +188,32 @@ static int send_wqe_overhead(enum ib_qp_type type)
}
}
static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
enum ib_qp_type type, struct mlx4_ib_qp *qp)
static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
struct mlx4_ib_qp *qp)
{
/* Sanity check QP size before proceeding */
/* Sanity check RQ size before proceeding */
if (cap->max_recv_wr > dev->dev->caps.max_wqes ||
cap->max_recv_sge > dev->dev->caps.max_rq_sg)
return -EINVAL;
qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0;
qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge *
sizeof (struct mlx4_wqe_data_seg)));
qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg);
cap->max_recv_wr = qp->rq.max;
cap->max_recv_sge = qp->rq.max_gs;
return 0;
}
static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
enum ib_qp_type type, struct mlx4_ib_qp *qp)
{
/* Sanity check SQ size before proceeding */
if (cap->max_send_wr > dev->dev->caps.max_wqes ||
cap->max_recv_wr > dev->dev->caps.max_wqes ||
cap->max_send_sge > dev->dev->caps.max_sq_sg ||
cap->max_recv_sge > dev->dev->caps.max_rq_sg ||
cap->max_inline_data + send_wqe_overhead(type) +
sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
return -EINVAL;
......@@ -208,12 +226,7 @@ static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
return -EINVAL;
qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0;
qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 0;
qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge *
sizeof (struct mlx4_wqe_data_seg)));
qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg);
qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 1;
qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
sizeof (struct mlx4_wqe_data_seg),
......@@ -233,16 +246,26 @@ static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
qp->sq.offset = 0;
}
cap->max_send_wr = qp->sq.max;
cap->max_recv_wr = qp->rq.max;
cap->max_send_sge = qp->sq.max_gs;
cap->max_recv_sge = qp->rq.max_gs;
cap->max_send_wr = qp->sq.max;
cap->max_send_sge = qp->sq.max_gs;
cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) -
sizeof (struct mlx4_wqe_inline_seg);
return 0;
}
static int set_user_sq_size(struct mlx4_ib_qp *qp,
struct mlx4_ib_create_qp *ucmd)
{
qp->sq.max = 1 << ucmd->log_sq_bb_count;
qp->sq.wqe_shift = ucmd->log_sq_stride;
qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) +
(qp->sq.max << qp->sq.wqe_shift);
return 0;
}
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
......@@ -264,7 +287,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->sq.head = 0;
qp->sq.tail = 0;
err = set_qp_size(dev, &init_attr->cap, init_attr->qp_type, qp);
err = set_rq_size(dev, &init_attr->cap, qp);
if (err)
goto err;
......@@ -276,6 +299,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err;
}
err = set_user_sq_size(qp, &ucmd);
if (err)
goto err;
qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
qp->buf_size, 0);
if (IS_ERR(qp->umem)) {
......@@ -297,6 +324,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_mtt;
} else {
err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
if (err)
goto err;
err = mlx4_ib_db_alloc(dev, &qp->db, 0);
if (err)
goto err;
......@@ -573,7 +604,7 @@ static int to_mlx4_st(enum ib_qp_type type)
}
}
static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *attr,
static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,
int attr_mask)
{
u8 dest_rd_atomic;
......@@ -603,7 +634,7 @@ static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *att
return cpu_to_be32(hw_access_flags);
}
static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, struct ib_qp_attr *attr,
static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,
int attr_mask)
{
if (attr_mask & IB_QP_PKEY_INDEX)
......@@ -619,7 +650,7 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
}
static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah,
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
struct mlx4_qp_path *path, u8 port)
{
path->grh_mylmc = ah->src_path_bits & 0x7f;
......@@ -655,14 +686,14 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah,
return 0;
}
int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
struct mlx4_qp_context *context;
enum mlx4_qp_optpar optpar = 0;
enum ib_qp_state cur_state, new_state;
int sqd_event;
int err = -EINVAL;
......@@ -670,34 +701,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (!context)
return -ENOMEM;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
goto out;
if ((attr_mask & IB_QP_PKEY_INDEX) &&
attr->pkey_index >= dev->dev->caps.pkey_table_len) {
goto out;
}
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
goto out;
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
goto out;
}
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) {
goto out;
}
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
(to_mlx4_st(ibqp->qp_type) << 16));
context->flags |= cpu_to_be32(1 << 8); /* DE? */
......@@ -920,11 +923,84 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
out:
mutex_unlock(&qp->mutex);
kfree(context);
return err;
}
static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 };
static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_QKEY),
[IB_QPT_UC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
};
int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
goto out;
if ((attr_mask & IB_QP_PKEY_INDEX) &&
attr->pkey_index >= dev->dev->caps.pkey_table_len) {
goto out;
}
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
goto out;
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
goto out;
}
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
goto out;
}
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
err = 0;
goto out;
}
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,
mlx4_ib_qp_attr_mask_table[ibqp->qp_type],
IB_QPS_RESET, IB_QPS_INIT);
if (err)
goto out;
cur_state = IB_QPS_INIT;
}
err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
out:
mutex_unlock(&qp->mutex);
return err;
}
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
void *wqe)
{
......@@ -952,6 +1028,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff;
sqp->ud_header.grh.flow_label =
ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
sqp->ud_header.grh.hop_limit = ah->av.hop_limit;
ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24,
ah->av.gid_index, &sqp->ud_header.grh.source_gid);
memcpy(sqp->ud_header.grh.destination_gid.raw,
......@@ -1192,7 +1269,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
*/
wmb();
if (wr->opcode < 0 || wr->opcode > ARRAY_SIZE(mlx4_ib_opcode)) {
if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
err = -EINVAL;
goto out;
}
......
......@@ -297,6 +297,12 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
break;
}
if (unlikely(srq->head == srq->tail)) {
err = -ENOMEM;
*bad_wr = wr;
break;
}
srq->wrid[srq->head] = wr->wr_id;
next = get_wqe(srq, srq->head);
......
......@@ -39,7 +39,7 @@
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
#define MLX4_IB_UVERBS_ABI_VERSION 1
#define MLX4_IB_UVERBS_ABI_VERSION 2
/*
* Make sure that all structs defined in this file remain laid out so
......@@ -87,6 +87,9 @@ struct mlx4_ib_create_srq_resp {
struct mlx4_ib_create_qp {
__u64 buf_addr;
__u64 db_addr;
__u8 log_sq_bb_count;
__u8 log_sq_stride;
__u8 reserved[6];
};
#endif /* MLX4_IB_USER_H */
......@@ -279,6 +279,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
(be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff;
header->grh.flow_label =
ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
header->grh.hop_limit = ah->av->hop_limit;
ib_get_cached_gid(&dev->ib_dev,
be32_to_cpu(ah->av->port_pd) >> 24,
ah->av->gid_index % dev->limits.gid_table_len,
......
......@@ -1250,12 +1250,14 @@ static void __mthca_remove_one(struct pci_dev *pdev)
int __mthca_restart_one(struct pci_dev *pdev)
{
struct mthca_dev *mdev;
int hca_type;
mdev = pci_get_drvdata(pdev);
if (!mdev)
return -ENODEV;
hca_type = mdev->hca_type;
__mthca_remove_one(pdev);
return __mthca_init_one(pdev, mdev->hca_type);
return __mthca_init_one(pdev, hca_type);
}
static int __devinit mthca_init_one(struct pci_dev *pdev,
......
......@@ -296,7 +296,7 @@ static int to_mthca_st(int transport)
}
}
static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr,
int attr_mask)
{
if (attr_mask & IB_QP_PKEY_INDEX)
......@@ -328,7 +328,7 @@ static void init_port(struct mthca_dev *dev, int port)
mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
}
static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr,
static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr,
int attr_mask)
{
u8 dest_rd_atomic;
......@@ -511,7 +511,7 @@ out:
return err;
}
static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah,
struct mthca_qp_path *path, u8 port)
{
path->g_mylmc = ah->src_path_bits & 0x7f;
......@@ -539,12 +539,12 @@ static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
return 0;
}
int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
struct ib_udata *udata)
static int __mthca_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
struct mthca_mailbox *mailbox;
struct mthca_qp_param *qp_param;
struct mthca_qp_context *qp_context;
......@@ -552,60 +552,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
u8 status;
int err = -EINVAL;
mutex_lock(&qp->mutex);
if (attr_mask & IB_QP_CUR_STATE) {
cur_state = attr->cur_qp_state;
} else {
spin_lock_irq(&qp->sq.lock);
spin_lock(&qp->rq.lock);
cur_state = qp->state;
spin_unlock(&qp->rq.lock);
spin_unlock_irq(&qp->sq.lock);
}
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
mthca_dbg(dev, "Bad QP transition (transport %d) "
"%d->%d with attr 0x%08x\n",
qp->transport, cur_state, new_state,
attr_mask);
goto out;
}
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
err = 0;
goto out;
}
if ((attr_mask & IB_QP_PKEY_INDEX) &&
attr->pkey_index >= dev->limits.pkey_table_len) {
mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
attr->pkey_index, dev->limits.pkey_table_len-1);
goto out;
}
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
goto out;
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
goto out;
}
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
goto out;
}
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox)) {
err = PTR_ERR(mailbox);
......@@ -892,6 +838,98 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
out_mailbox:
mthca_free_mailbox(dev, mailbox);
out:
return err;
}
static const struct ib_qp_attr dummy_init_attr = { .port_num = 1 };
static const int dummy_init_attr_mask[] = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_QKEY),
[IB_QPT_UC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
};
int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
mutex_lock(&qp->mutex);
if (attr_mask & IB_QP_CUR_STATE) {
cur_state = attr->cur_qp_state;
} else {
spin_lock_irq(&qp->sq.lock);
spin_lock(&qp->rq.lock);
cur_state = qp->state;
spin_unlock(&qp->rq.lock);
spin_unlock_irq(&qp->sq.lock);
}
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
mthca_dbg(dev, "Bad QP transition (transport %d) "
"%d->%d with attr 0x%08x\n",
qp->transport, cur_state, new_state,
attr_mask);
goto out;
}
if ((attr_mask & IB_QP_PKEY_INDEX) &&
attr->pkey_index >= dev->limits.pkey_table_len) {
mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
attr->pkey_index, dev->limits.pkey_table_len-1);
goto out;
}
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
goto out;
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
goto out;
}
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
goto out;
}
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
err = 0;
goto out;
}
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
err = __mthca_modify_qp(ibqp, &dummy_init_attr,
dummy_init_attr_mask[ibqp->qp_type],
IB_QPS_RESET, IB_QPS_INIT);
if (err)
goto out;
cur_state = IB_QPS_INIT;
}
err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
out:
mutex_unlock(&qp->mutex);
......
......@@ -132,12 +132,46 @@ struct ipoib_cm_data {
__be32 mtu;
};
/*
* Quoting 10.3.1 Queue Pair and EE Context States:
*
* Note, for QPs that are associated with an SRQ, the Consumer should take the
* QP through the Error State before invoking a Destroy QP or a Modify QP to the
* Reset State. The Consumer may invoke the Destroy QP without first performing
* a Modify QP to the Error State and waiting for the Affiliated Asynchronous
* Last WQE Reached Event. However, if the Consumer does not wait for the
* Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
* leakage may occur. Therefore, it is good programming practice to tear down a
* QP that is associated with an SRQ by using the following process:
*
* - Put the QP in the Error State
* - Wait for the Affiliated Asynchronous Last WQE Reached Event;
* - either:
* drain the CQ by invoking the Poll CQ verb and either wait for CQ
* to be empty or the number of Poll CQ operations has exceeded
* CQ capacity size;
* - or
* post another WR that completes on the same CQ and wait for this
* WR to return as a WC;
* - and then invoke a Destroy QP or Reset QP.
*
* We use the second option and wait for a completion on the
* rx_drain_qp before destroying QPs attached to our SRQ.
*/
enum ipoib_cm_state {
IPOIB_CM_RX_LIVE,
IPOIB_CM_RX_ERROR, /* Ignored by stale task */
IPOIB_CM_RX_FLUSH /* Last WQE Reached event observed */
};
struct ipoib_cm_rx {
struct ib_cm_id *id;
struct ib_qp *qp;
struct list_head list;
struct net_device *dev;
unsigned long jiffies;
enum ipoib_cm_state state;
};
struct ipoib_cm_tx {
......@@ -165,10 +199,16 @@ struct ipoib_cm_dev_priv {
struct ib_srq *srq;
struct ipoib_cm_rx_buf *srq_ring;
struct ib_cm_id *id;
struct list_head passive_ids;
struct ib_qp *rx_drain_qp; /* generates WR described in 10.3.1 */
struct list_head passive_ids; /* state: LIVE */
struct list_head rx_error_list; /* state: ERROR */
struct list_head rx_flush_list; /* state: FLUSH, drain not started */
struct list_head rx_drain_list; /* state: FLUSH, drain started */
struct list_head rx_reap_list; /* state: FLUSH, drain done */
struct work_struct start_task;
struct work_struct reap_task;
struct work_struct skb_task;
struct work_struct rx_reap_task;
struct delayed_work stale_task;
struct sk_buff_head skb_queue;
struct list_head start_list;
......@@ -201,15 +241,17 @@ struct ipoib_dev_priv {
struct list_head multicast_list;
struct rb_root multicast_tree;
struct delayed_work pkey_task;
struct delayed_work pkey_poll_task;
struct delayed_work mcast_task;
struct work_struct flush_task;
struct work_struct restart_task;
struct delayed_work ah_reap_task;
struct work_struct pkey_event_task;
struct ib_device *ca;
u8 port;
u16 pkey;
u16 pkey_index;
struct ib_pd *pd;
struct ib_mr *mr;
struct ib_cq *cq;
......@@ -333,12 +375,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_ib_dev_flush(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush);
int ipoib_ib_dev_stop(struct net_device *dev);
int ipoib_ib_dev_stop(struct net_device *dev, int flush);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev);
......
......@@ -37,6 +37,7 @@
#include <net/dst.h>
#include <net/icmp.h>
#include <linux/icmpv6.h>
#include <linux/delay.h>
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
static int data_debug_level;
......@@ -62,6 +63,16 @@ struct ipoib_cm_id {
u32 remote_mtu;
};
static struct ib_qp_attr ipoib_cm_err_attr = {
.qp_state = IB_QPS_ERR
};
#define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff
static struct ib_recv_wr ipoib_cm_rx_drain_wr = {
.wr_id = IPOIB_CM_RX_DRAIN_WRID
};
static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *event);
......@@ -150,11 +161,44 @@ partial_error:
return NULL;
}
static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv)
{
struct ib_recv_wr *bad_wr;
/* rx_drain_qp send queue depth is 1, so
* make sure we have at most 1 outstanding WR. */
if (list_empty(&priv->cm.rx_flush_list) ||
!list_empty(&priv->cm.rx_drain_list))
return;
if (ib_post_recv(priv->cm.rx_drain_qp, &ipoib_cm_rx_drain_wr, &bad_wr))
ipoib_warn(priv, "failed to post rx_drain wr\n");
list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
}
static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
{
struct ipoib_cm_rx *p = ctx;
struct ipoib_dev_priv *priv = netdev_priv(p->dev);
unsigned long flags;
if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
return;
spin_lock_irqsave(&priv->lock, flags);
list_move(&p->list, &priv->cm.rx_flush_list);
p->state = IPOIB_CM_RX_FLUSH;
ipoib_cm_start_rx_drain(priv);
spin_unlock_irqrestore(&priv->lock, flags);
}
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
struct ipoib_cm_rx *p)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = {
.event_handler = ipoib_cm_rx_event_handler,
.send_cq = priv->cq, /* does not matter, we never send anything */
.recv_cq = priv->cq,
.srq = priv->cm.srq,
......@@ -256,6 +300,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
cm_id->context = p;
p->jiffies = jiffies;
p->state = IPOIB_CM_RX_LIVE;
spin_lock_irq(&priv->lock);
if (list_empty(&priv->cm.passive_ids))
queue_delayed_work(ipoib_workqueue,
......@@ -277,7 +322,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
{
struct ipoib_cm_rx *p;
struct ipoib_dev_priv *priv;
int ret;
switch (event->event) {
case IB_CM_REQ_RECEIVED:
......@@ -289,20 +333,9 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
case IB_CM_REJ_RECEIVED:
p = cm_id->context;
priv = netdev_priv(p->dev);
spin_lock_irq(&priv->lock);
if (list_empty(&p->list))
ret = 0; /* Connection is going away already. */
else {
list_del_init(&p->list);
ret = -ECONNRESET;
}
spin_unlock_irq(&priv->lock);
if (ret) {
ib_destroy_qp(p->qp);
kfree(p);
return ret;
}
return 0;
if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
ipoib_warn(priv, "unable to move qp to error state\n");
/* Fall through */
default:
return 0;
}
......@@ -354,8 +387,15 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
wr_id, wc->status);
if (unlikely(wr_id >= ipoib_recvq_size)) {
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
wr_id, ipoib_recvq_size);
if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) {
spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv);
queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
spin_unlock_irqrestore(&priv->lock, flags);
} else
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
wr_id, ipoib_recvq_size);
return;
}
......@@ -374,9 +414,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
spin_lock_irqsave(&priv->lock, flags);
p->jiffies = jiffies;
/* Move this entry to list head, but do
* not re-add it if it has been removed. */
if (!list_empty(&p->list))
/* Move this entry to list head, but do not re-add it
* if it has been moved out of list. */
if (p->state == IPOIB_CM_RX_LIVE)
list_move(&p->list, &priv->cm.passive_ids);
spin_unlock_irqrestore(&priv->lock, flags);
}
......@@ -583,17 +623,43 @@ static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
int ipoib_cm_dev_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr qp_init_attr = {
.send_cq = priv->cq, /* does not matter, we never send anything */
.recv_cq = priv->cq,
.cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */
.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
.cap.max_recv_wr = 1,
.cap.max_recv_sge = 1, /* FIXME: 0 Seems not to work */
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_UC,
};
int ret;
if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
return 0;
priv->cm.rx_drain_qp = ib_create_qp(priv->pd, &qp_init_attr);
if (IS_ERR(priv->cm.rx_drain_qp)) {
printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
ret = PTR_ERR(priv->cm.rx_drain_qp);
return ret;
}
/*
* We put the QP in error state directly. This way, a "flush
* error" WC will be immediately generated for each WR we post.
*/
ret = ib_modify_qp(priv->cm.rx_drain_qp, &ipoib_cm_err_attr, IB_QP_STATE);
if (ret) {
ipoib_warn(priv, "failed to modify drain QP to error: %d\n", ret);
goto err_qp;
}
priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
if (IS_ERR(priv->cm.id)) {
printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
ret = PTR_ERR(priv->cm.id);
priv->cm.id = NULL;
return ret;
goto err_cm;
}
ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
......@@ -601,35 +667,79 @@ int ipoib_cm_dev_open(struct net_device *dev)
if (ret) {
printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
IPOIB_CM_IETF_ID | priv->qp->qp_num);
ib_destroy_cm_id(priv->cm.id);
priv->cm.id = NULL;
return ret;
goto err_listen;
}
return 0;
err_listen:
ib_destroy_cm_id(priv->cm.id);
err_cm:
priv->cm.id = NULL;
err_qp:
ib_destroy_qp(priv->cm.rx_drain_qp);
return ret;
}
void ipoib_cm_dev_stop(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_cm_rx *p;
struct ipoib_cm_rx *p, *n;
unsigned long begin;
LIST_HEAD(list);
int ret;
if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id)
return;
ib_destroy_cm_id(priv->cm.id);
priv->cm.id = NULL;
spin_lock_irq(&priv->lock);
while (!list_empty(&priv->cm.passive_ids)) {
p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
list_del_init(&p->list);
list_move(&p->list, &priv->cm.rx_error_list);
p->state = IPOIB_CM_RX_ERROR;
spin_unlock_irq(&priv->lock);
ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
if (ret)
ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
spin_lock_irq(&priv->lock);
}
/* Wait for all RX to be drained */
begin = jiffies;
while (!list_empty(&priv->cm.rx_error_list) ||
!list_empty(&priv->cm.rx_flush_list) ||
!list_empty(&priv->cm.rx_drain_list)) {
if (!time_after(jiffies, begin + 5 * HZ)) {
ipoib_warn(priv, "RX drain timing out\n");
/*
* assume the HW is wedged and just free up everything.
*/
list_splice_init(&priv->cm.rx_flush_list, &list);
list_splice_init(&priv->cm.rx_error_list, &list);
list_splice_init(&priv->cm.rx_drain_list, &list);
break;
}
spin_unlock_irq(&priv->lock);
msleep(1);
spin_lock_irq(&priv->lock);
}
list_splice_init(&priv->cm.rx_reap_list, &list);
spin_unlock_irq(&priv->lock);
list_for_each_entry_safe(p, n, &list, list) {
ib_destroy_cm_id(p->id);
ib_destroy_qp(p->qp);
kfree(p);
spin_lock_irq(&priv->lock);
}
spin_unlock_irq(&priv->lock);
ib_destroy_qp(priv->cm.rx_drain_qp);
cancel_delayed_work(&priv->cm.stale_task);
}
......@@ -1079,24 +1189,44 @@ void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
queue_work(ipoib_workqueue, &priv->cm.skb_task);
}
static void ipoib_cm_rx_reap(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
cm.rx_reap_task);
struct ipoib_cm_rx *p, *n;
LIST_HEAD(list);
spin_lock_irq(&priv->lock);
list_splice_init(&priv->cm.rx_reap_list, &list);
spin_unlock_irq(&priv->lock);
list_for_each_entry_safe(p, n, &list, list) {
ib_destroy_cm_id(p->id);
ib_destroy_qp(p->qp);
kfree(p);
}
}
static void ipoib_cm_stale_task(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
cm.stale_task.work);
struct ipoib_cm_rx *p;
int ret;
spin_lock_irq(&priv->lock);
while (!list_empty(&priv->cm.passive_ids)) {
/* List if sorted by LRU, start from tail,
/* List is sorted by LRU, start from tail,
* stop when we see a recently used entry */
p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
break;
list_del_init(&p->list);
list_move(&p->list, &priv->cm.rx_error_list);
p->state = IPOIB_CM_RX_ERROR;
spin_unlock_irq(&priv->lock);
ib_destroy_cm_id(p->id);
ib_destroy_qp(p->qp);
kfree(p);
ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
if (ret)
ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
spin_lock_irq(&priv->lock);
}
......@@ -1164,9 +1294,14 @@ int ipoib_cm_dev_init(struct net_device *dev)
INIT_LIST_HEAD(&priv->cm.passive_ids);
INIT_LIST_HEAD(&priv->cm.reap_list);
INIT_LIST_HEAD(&priv->cm.start_list);
INIT_LIST_HEAD(&priv->cm.rx_error_list);
INIT_LIST_HEAD(&priv->cm.rx_flush_list);
INIT_LIST_HEAD(&priv->cm.rx_drain_list);
INIT_LIST_HEAD(&priv->cm.rx_reap_list);
INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap);
INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);
skb_queue_head_init(&priv->cm.skb_queue);
......
......@@ -448,6 +448,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
return -1;
}
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = ipoib_init_qp(dev);
if (ret) {
ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
......@@ -457,14 +464,14 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
ipoib_ib_dev_stop(dev);
ipoib_ib_dev_stop(dev, 1);
return -1;
}
ret = ipoib_cm_dev_open(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
ipoib_ib_dev_stop(dev);
ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
ipoib_ib_dev_stop(dev, 1);
return -1;
}
......@@ -516,7 +523,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
mutex_lock(&pkey_mutex);
set_bit(IPOIB_PKEY_STOP, &priv->flags);
cancel_delayed_work(&priv->pkey_task);
cancel_delayed_work(&priv->pkey_poll_task);
mutex_unlock(&pkey_mutex);
if (flush)
flush_workqueue(ipoib_workqueue);
......@@ -543,7 +550,7 @@ static int recvs_pending(struct net_device *dev)
return pending;
}
int ipoib_ib_dev_stop(struct net_device *dev)
int ipoib_ib_dev_stop(struct net_device *dev, int flush)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr;
......@@ -629,7 +636,8 @@ timeout:
/* Wait for all AHs to be reaped */
set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task);
flush_workqueue(ipoib_workqueue);
if (flush)
flush_workqueue(ipoib_workqueue);
begin = jiffies;
......@@ -673,13 +681,24 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
return 0;
}
void ipoib_ib_dev_flush(struct work_struct *work)
static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
{
struct ipoib_dev_priv *cpriv, *priv =
container_of(work, struct ipoib_dev_priv, flush_task);
struct ipoib_dev_priv *cpriv;
struct net_device *dev = priv->dev;
u16 new_index;
mutex_lock(&priv->vlan_mutex);
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
/*
* Flush any child interfaces too -- they might be up even if
* the parent is down.
*/
list_for_each_entry(cpriv, &priv->child_intfs, list)
__ipoib_ib_dev_flush(cpriv, pkey_event);
mutex_unlock(&priv->vlan_mutex);
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return;
}
......@@ -689,10 +708,32 @@ void ipoib_ib_dev_flush(struct work_struct *work)
return;
}
if (pkey_event) {
if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ipoib_ib_dev_down(dev, 0);
ipoib_pkey_dev_delay_open(dev);
return;
}
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
/* restart QP only if P_Key index is changed */
if (new_index == priv->pkey_index) {
ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
return;
}
priv->pkey_index = new_index;
}
ipoib_dbg(priv, "flushing\n");
ipoib_ib_dev_down(dev, 0);
if (pkey_event) {
ipoib_ib_dev_stop(dev, 0);
ipoib_ib_dev_open(dev);
}
/*
* The device could have been brought down between the start and when
* we get here, don't bring it back up if it's not configured up
......@@ -701,14 +742,24 @@ void ipoib_ib_dev_flush(struct work_struct *work)
ipoib_ib_dev_up(dev);
ipoib_mcast_restart_task(&priv->restart_task);
}
}
mutex_lock(&priv->vlan_mutex);
void ipoib_ib_dev_flush(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, flush_task);
/* Flush any child interfaces too */
list_for_each_entry(cpriv, &priv->child_intfs, list)
ipoib_ib_dev_flush(&cpriv->flush_task);
ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
__ipoib_ib_dev_flush(priv, 0);
}
mutex_unlock(&priv->vlan_mutex);
void ipoib_pkey_event(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, pkey_event_task);
ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name);
__ipoib_ib_dev_flush(priv, 1);
}
void ipoib_ib_dev_cleanup(struct net_device *dev)
......@@ -736,7 +787,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
void ipoib_pkey_poll(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, pkey_task.work);
container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
struct net_device *dev = priv->dev;
ipoib_pkey_dev_check_presence(dev);
......@@ -747,7 +798,7 @@ void ipoib_pkey_poll(struct work_struct *work)
mutex_lock(&pkey_mutex);
if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
queue_delayed_work(ipoib_workqueue,
&priv->pkey_task,
&priv->pkey_poll_task,
HZ);
mutex_unlock(&pkey_mutex);
}
......@@ -766,7 +817,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev)
mutex_lock(&pkey_mutex);
clear_bit(IPOIB_PKEY_STOP, &priv->flags);
queue_delayed_work(ipoib_workqueue,
&priv->pkey_task,
&priv->pkey_poll_task,
HZ);
mutex_unlock(&pkey_mutex);
return 1;
......
......@@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
return -EINVAL;
if (ipoib_ib_dev_up(dev)) {
ipoib_ib_dev_stop(dev);
ipoib_ib_dev_stop(dev, 1);
return -EINVAL;
}
......@@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device *dev)
flush_workqueue(ipoib_workqueue);
ipoib_ib_dev_down(dev, 1);
ipoib_ib_dev_stop(dev);
ipoib_ib_dev_stop(dev, 1);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
......@@ -988,7 +988,8 @@ static void ipoib_setup(struct net_device *dev)
INIT_LIST_HEAD(&priv->dead_ahs);
INIT_LIST_HEAD(&priv->multicast_list);
INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll);
INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush);
INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
......
......@@ -524,7 +524,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
return;
if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
ipoib_warn(priv, "ib_gid_entry_get() failed\n");
ipoib_warn(priv, "ib_query_gid() failed\n");
else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
......
......@@ -33,8 +33,6 @@
* $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <rdma/ib_cache.h>
#include "ipoib.h"
int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
......@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
if (!qp_attr)
goto out;
if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = -ENXIO;
goto out;
......@@ -94,26 +92,16 @@ int ipoib_init_qp(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
u16 pkey_index;
struct ib_qp_attr qp_attr;
int attr_mask;
/*
* Search through the port P_Key table for the requested pkey value.
* The port has to be assigned to the respective IB partition in
* advance.
*/
ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
if (ret) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
return ret;
}
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return -1;
qp_attr.qp_state = IB_QPS_INIT;
qp_attr.qkey = 0;
qp_attr.port_num = priv->port;
qp_attr.pkey_index = pkey_index;
qp_attr.pkey_index = priv->pkey_index;
attr_mask =
IB_QP_QKEY |
IB_QP_PORT |
......@@ -185,7 +173,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
size = ipoib_sendq_size + ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev);
if (!ret)
size += ipoib_recvq_size;
size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */;
priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
if (IS_ERR(priv->cq)) {
......@@ -259,14 +247,18 @@ void ipoib_event(struct ib_event_handler *handler,
struct ipoib_dev_priv *priv =
container_of(handler, struct ipoib_dev_priv, event_handler);
if ((record->event == IB_EVENT_PORT_ERR ||
record->event == IB_EVENT_PKEY_CHANGE ||
record->event == IB_EVENT_PORT_ACTIVE ||
record->event == IB_EVENT_LID_CHANGE ||
record->event == IB_EVENT_SM_CHANGE ||
record->event == IB_EVENT_CLIENT_REREGISTER) &&
record->element.port_num == priv->port) {
if (record->element.port_num != priv->port)
return;
if (record->event == IB_EVENT_PORT_ERR ||
record->event == IB_EVENT_PORT_ACTIVE ||
record->event == IB_EVENT_LID_CHANGE ||
record->event == IB_EVENT_SM_CHANGE ||
record->event == IB_EVENT_CLIENT_REREGISTER) {
ipoib_dbg(priv, "Port state change event\n");
queue_work(ipoib_workqueue, &priv->flush_task);
} else if (record->event == IB_EVENT_PKEY_CHANGE) {
ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port);
queue_work(ipoib_workqueue, &priv->pkey_event_task);
}
}
......@@ -90,7 +90,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
int i;
mlx4_dbg(dev, "DEV_CAP flags:\n");
for (i = 0; i < 32; ++i)
for (i = 0; i < ARRAY_SIZE(fname); ++i)
if (fname[i] && (flags & (1 << i)))
mlx4_dbg(dev, " %s\n", fname[i]);
}
......
......@@ -890,6 +890,8 @@ struct ib_device {
spinlock_t client_data_lock;
struct ib_cache cache;
int *pkey_tbl_len;
int *gid_tbl_len;
u32 flags;
......@@ -1118,6 +1120,12 @@ int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify);
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
u8 *port_num, u16 *index);
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index);
/**
* ib_alloc_pd - Allocates an unused protection domain.
* @device: The device on which to allocate the protection domain.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment