Commit d7bb58fb authored by Jack Morgenstein's avatar Jack Morgenstein Committed by Roland Dreier

mlx4_core: Write MTTs from CPU instead with of WRITE_MTT FW command

Write MTT entries directly to ICM from the driver (eliminating use of
WRITE_MTT command).  This reduces the number of FW commands needed to
register an MR by at least a factor of 2 and speeds up memory
registration significantly.  This code will also be used to implement
FMRs.
Signed-off-by: default avatarJack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: default avatarMichael S. Tsirkin <mst@dev.mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 121964ec
...@@ -96,11 +96,10 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, ...@@ -96,11 +96,10 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
pages[i++] = sg_dma_address(&chunk->page_list[j]) + pages[i++] = sg_dma_address(&chunk->page_list[j]) +
umem->page_size * k; umem->page_size * k;
/* /*
* Be friendly to WRITE_MTT firmware * Be friendly to mlx4_write_mtt() and
* command, and pass it chunks of * pass it chunks of appropriate size.
* appropriate size.
*/ */
if (i == PAGE_SIZE / sizeof (u64) - 2) { if (i == PAGE_SIZE / sizeof (u64)) {
err = mlx4_write_mtt(dev->dev, mtt, n, err = mlx4_write_mtt(dev->dev, mtt, n,
i, pages); i, pages);
if (err) if (err)
......
...@@ -301,9 +301,9 @@ void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj) ...@@ -301,9 +301,9 @@ void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
mutex_unlock(&table->mutex); mutex_unlock(&table->mutex);
} }
void *mlx4_table_find(struct mlx4_icm_table *table, int obj) void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle)
{ {
int idx, offset, i; int idx, offset, dma_offset, i;
struct mlx4_icm_chunk *chunk; struct mlx4_icm_chunk *chunk;
struct mlx4_icm *icm; struct mlx4_icm *icm;
struct page *page = NULL; struct page *page = NULL;
...@@ -313,15 +313,26 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj) ...@@ -313,15 +313,26 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj)
mutex_lock(&table->mutex); mutex_lock(&table->mutex);
idx = obj & (table->num_obj - 1); idx = (obj & (table->num_obj - 1)) * table->obj_size;
icm = table->icm[idx / (MLX4_TABLE_CHUNK_SIZE / table->obj_size)]; icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE];
offset = idx % (MLX4_TABLE_CHUNK_SIZE / table->obj_size); dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE;
if (!icm) if (!icm)
goto out; goto out;
list_for_each_entry(chunk, &icm->chunk_list, list) { list_for_each_entry(chunk, &icm->chunk_list, list) {
for (i = 0; i < chunk->npages; ++i) { for (i = 0; i < chunk->npages; ++i) {
if (dma_handle && dma_offset >= 0) {
if (sg_dma_len(&chunk->mem[i]) > dma_offset)
*dma_handle = sg_dma_address(&chunk->mem[i]) +
dma_offset;
dma_offset -= sg_dma_len(&chunk->mem[i]);
}
/*
* DMA mapping can merge pages but not split them,
* so if we found the page, dma_handle has already
* been assigned to.
*/
if (chunk->mem[i].length > offset) { if (chunk->mem[i].length > offset) {
page = chunk->mem[i].page; page = chunk->mem[i].page;
goto out; goto out;
......
...@@ -83,7 +83,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, ...@@ -83,7 +83,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table); void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table);
int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
void *mlx4_table_find(struct mlx4_icm_table *table, int obj); void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle);
int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
int start, int end); int start, int end);
void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
......
...@@ -300,6 +300,17 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev, ...@@ -300,6 +300,17 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
goto err_unmap_cmpt; goto err_unmap_cmpt;
} }
/*
* Reserved MTT entries must be aligned up to a cacheline
* boundary, since the FW will write to them, while the driver
* writes to all other MTT entries. (The variable
* dev->caps.mtt_entry_sz below is really the MTT segment
* size, not the raw entry size)
*/
dev->caps.reserved_mtts =
ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
init_hca->mtt_base, init_hca->mtt_base,
dev->caps.mtt_entry_sz, dev->caps.mtt_entry_sz,
......
...@@ -349,58 +349,57 @@ err_table: ...@@ -349,58 +349,57 @@ err_table:
} }
EXPORT_SYMBOL_GPL(mlx4_mr_enable); EXPORT_SYMBOL_GPL(mlx4_mr_enable);
static int mlx4_WRITE_MTT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
int num_mtt)
{
return mlx4_cmd(dev, mailbox->dma, num_mtt, 0, MLX4_CMD_WRITE_MTT,
MLX4_CMD_TIME_CLASS_B);
}
int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
int start_index, int npages, u64 *page_list) int start_index, int npages, u64 *page_list)
{ {
struct mlx4_cmd_mailbox *mailbox; struct mlx4_priv *priv = mlx4_priv(dev);
__be64 *mtt_entry; __be64 *mtts;
dma_addr_t dma_handle;
int i; int i;
int err = 0; int s = start_index * sizeof (u64);
if (mtt->order < 0) /* All MTTs must fit in the same page */
if (start_index / (PAGE_SIZE / sizeof (u64)) !=
(start_index + npages - 1) / (PAGE_SIZE / sizeof (u64)))
return -EINVAL; return -EINVAL;
mailbox = mlx4_alloc_cmd_mailbox(dev); if (start_index & (MLX4_MTT_ENTRY_PER_SEG - 1))
if (IS_ERR(mailbox)) return -EINVAL;
return PTR_ERR(mailbox);
mtt_entry = mailbox->buf; mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg +
s / dev->caps.mtt_entry_sz, &dma_handle);
if (!mtts)
return -ENOMEM;
while (npages > 0) { for (i = 0; i < npages; ++i)
mtt_entry[0] = cpu_to_be64(mlx4_mtt_addr(dev, mtt) + start_index * 8); mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
mtt_entry[1] = 0;
for (i = 0; i < npages && i < MLX4_MAILBOX_SIZE / 8 - 2; ++i) dma_sync_single(&dev->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE);
mtt_entry[i + 2] = cpu_to_be64(page_list[i] |
MLX4_MTT_FLAG_PRESENT);
/* return 0;
* If we have an odd number of entries to write, add }
* one more dummy entry for firmware efficiency.
*/
if (i & 1)
mtt_entry[i + 2] = 0;
err = mlx4_WRITE_MTT(dev, mailbox, (i + 1) & ~1); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
int start_index, int npages, u64 *page_list)
{
int chunk;
int err;
if (mtt->order < 0)
return -EINVAL;
while (npages > 0) {
chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
if (err) if (err)
goto out; return err;
npages -= i; npages -= chunk;
start_index += i; start_index += chunk;
page_list += i; page_list += chunk;
} }
out: return 0;
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
} }
EXPORT_SYMBOL_GPL(mlx4_write_mtt); EXPORT_SYMBOL_GPL(mlx4_write_mtt);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment