Commit a8474ce2 authored by Jens Axboe's avatar Jens Axboe

SCSI: support for allocating large scatterlists

This is what enables large commands. If we need to allocate an
sgtable that doesn't fit in a single page, allocate several
SCSI_MAX_SG_SEGMENTS sized tables and chain them together.

SCSI defaults to large chained sg tables, if the arch supports it.
Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent 0cde8d95
...@@ -36,33 +36,19 @@ ...@@ -36,33 +36,19 @@
struct scsi_host_sg_pool { struct scsi_host_sg_pool {
size_t size; size_t size;
char *name; char *name;
struct kmem_cache *slab; struct kmem_cache *slab;
mempool_t *pool; mempool_t *pool;
}; };
#if (SCSI_MAX_PHYS_SEGMENTS < 32) #define SP(x) { x, "sgpool-" #x }
#error SCSI_MAX_PHYS_SEGMENTS is too small
#endif
#define SP(x) { x, "sgpool-" #x }
static struct scsi_host_sg_pool scsi_sg_pools[] = { static struct scsi_host_sg_pool scsi_sg_pools[] = {
SP(8), SP(8),
SP(16), SP(16),
SP(32), SP(32),
#if (SCSI_MAX_PHYS_SEGMENTS > 32)
SP(64), SP(64),
#if (SCSI_MAX_PHYS_SEGMENTS > 64)
SP(128), SP(128),
#if (SCSI_MAX_PHYS_SEGMENTS > 128) };
SP(256),
#if (SCSI_MAX_PHYS_SEGMENTS > 256)
#error SCSI_MAX_PHYS_SEGMENTS is too large
#endif
#endif
#endif
#endif
};
#undef SP #undef SP
static void scsi_run_queue(struct request_queue *q); static void scsi_run_queue(struct request_queue *q);
...@@ -698,45 +684,126 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate, ...@@ -698,45 +684,126 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
return NULL; return NULL;
} }
struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask) /*
{ * The maximum number of SG segments that we will put inside a scatterlist
struct scsi_host_sg_pool *sgp; * (unless chaining is used). Should ideally fit inside a single page, to
struct scatterlist *sgl; * avoid a higher order allocation.
*/
#define SCSI_MAX_SG_SEGMENTS 128
BUG_ON(!cmd->use_sg); /*
* Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
* is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
*/
#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048
switch (cmd->use_sg) { static inline unsigned int scsi_sgtable_index(unsigned short nents)
{
unsigned int index;
switch (nents) {
case 1 ... 8: case 1 ... 8:
cmd->sglist_len = 0; index = 0;
break; break;
case 9 ... 16: case 9 ... 16:
cmd->sglist_len = 1; index = 1;
break; break;
case 17 ... 32: case 17 ... 32:
cmd->sglist_len = 2; index = 2;
break; break;
#if (SCSI_MAX_PHYS_SEGMENTS > 32)
case 33 ... 64: case 33 ... 64:
cmd->sglist_len = 3; index = 3;
break; break;
#if (SCSI_MAX_PHYS_SEGMENTS > 64) case 65 ... SCSI_MAX_SG_SEGMENTS:
case 65 ... 128: index = 4;
cmd->sglist_len = 4;
break; break;
#if (SCSI_MAX_PHYS_SEGMENTS > 128)
case 129 ... 256:
cmd->sglist_len = 5;
break;
#endif
#endif
#endif
default: default:
return NULL; printk(KERN_ERR "scsi: bad segment count=%d\n", nents);
BUG();
} }
sgp = scsi_sg_pools + cmd->sglist_len; return index;
sgl = mempool_alloc(sgp->pool, gfp_mask); }
return sgl;
struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
{
struct scsi_host_sg_pool *sgp;
struct scatterlist *sgl, *prev, *ret;
unsigned int index;
int this, left;
BUG_ON(!cmd->use_sg);
left = cmd->use_sg;
ret = prev = NULL;
do {
this = left;
if (this > SCSI_MAX_SG_SEGMENTS) {
this = SCSI_MAX_SG_SEGMENTS - 1;
index = SG_MEMPOOL_NR - 1;
} else
index = scsi_sgtable_index(this);
left -= this;
sgp = scsi_sg_pools + index;
sgl = mempool_alloc(sgp->pool, gfp_mask);
if (unlikely(!sgl))
goto enomem;
memset(sgl, 0, sizeof(*sgl) * sgp->size);
/*
* first loop through, set initial index and return value
*/
if (!ret) {
cmd->sglist_len = index;
ret = sgl;
}
/*
* chain previous sglist, if any. we know the previous
* sglist must be the biggest one, or we would not have
* ended up doing another loop.
*/
if (prev)
sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
/*
* don't allow subsequent mempool allocs to sleep, it would
* violate the mempool principle.
*/
gfp_mask &= ~__GFP_WAIT;
gfp_mask |= __GFP_HIGH;
prev = sgl;
} while (left);
/*
* ->use_sg may get modified after dma mapping has potentially
* shrunk the number of segments, so keep a copy of it for free.
*/
cmd->__use_sg = cmd->use_sg;
return ret;
enomem:
if (ret) {
/*
* Free entries chained off ret. Since we were trying to
* allocate another sglist, we know that all entries are of
* the max size.
*/
sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
prev = ret;
ret = &ret[SCSI_MAX_SG_SEGMENTS - 1];
while ((sgl = sg_chain_ptr(ret)) != NULL) {
ret = &sgl[SCSI_MAX_SG_SEGMENTS - 1];
mempool_free(sgl, sgp->pool);
}
mempool_free(prev, sgp->pool);
}
return NULL;
} }
EXPORT_SYMBOL(scsi_alloc_sgtable); EXPORT_SYMBOL(scsi_alloc_sgtable);
...@@ -748,6 +815,42 @@ void scsi_free_sgtable(struct scsi_cmnd *cmd) ...@@ -748,6 +815,42 @@ void scsi_free_sgtable(struct scsi_cmnd *cmd)
BUG_ON(cmd->sglist_len >= SG_MEMPOOL_NR); BUG_ON(cmd->sglist_len >= SG_MEMPOOL_NR);
/*
* if this is the biggest size sglist, check if we have
* chained parts we need to free
*/
if (cmd->__use_sg > SCSI_MAX_SG_SEGMENTS) {
unsigned short this, left;
struct scatterlist *next;
unsigned int index;
left = cmd->__use_sg - (SCSI_MAX_SG_SEGMENTS - 1);
next = sg_chain_ptr(&sgl[SCSI_MAX_SG_SEGMENTS - 1]);
while (left && next) {
sgl = next;
this = left;
if (this > SCSI_MAX_SG_SEGMENTS) {
this = SCSI_MAX_SG_SEGMENTS - 1;
index = SG_MEMPOOL_NR - 1;
} else
index = scsi_sgtable_index(this);
left -= this;
sgp = scsi_sg_pools + index;
if (left)
next = sg_chain_ptr(&sgl[sgp->size - 1]);
mempool_free(sgl, sgp->pool);
}
/*
* Restore original, will be freed below
*/
sgl = cmd->request_buffer;
}
sgp = scsi_sg_pools + cmd->sglist_len; sgp = scsi_sg_pools + cmd->sglist_len;
mempool_free(sgl, sgp->pool); mempool_free(sgl, sgp->pool);
} }
...@@ -988,7 +1091,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) ...@@ -988,7 +1091,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
static int scsi_init_io(struct scsi_cmnd *cmd) static int scsi_init_io(struct scsi_cmnd *cmd)
{ {
struct request *req = cmd->request; struct request *req = cmd->request;
struct scatterlist *sgpnt;
int count; int count;
/* /*
...@@ -1001,14 +1103,13 @@ static int scsi_init_io(struct scsi_cmnd *cmd) ...@@ -1001,14 +1103,13 @@ static int scsi_init_io(struct scsi_cmnd *cmd)
/* /*
* If sg table allocation fails, requeue request later. * If sg table allocation fails, requeue request later.
*/ */
sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC); cmd->request_buffer = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
if (unlikely(!sgpnt)) { if (unlikely(!cmd->request_buffer)) {
scsi_unprep_request(req); scsi_unprep_request(req);
return BLKPREP_DEFER; return BLKPREP_DEFER;
} }
req->buffer = NULL; req->buffer = NULL;
cmd->request_buffer = (char *) sgpnt;
if (blk_pc_request(req)) if (blk_pc_request(req))
cmd->request_bufflen = req->data_len; cmd->request_bufflen = req->data_len;
else else
...@@ -1533,8 +1634,22 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, ...@@ -1533,8 +1634,22 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
if (!q) if (!q)
return NULL; return NULL;
/*
* this limit is imposed by hardware restrictions
*/
blk_queue_max_hw_segments(q, shost->sg_tablesize); blk_queue_max_hw_segments(q, shost->sg_tablesize);
blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);
/*
* In the future, sg chaining support will be mandatory and this
* ifdef can then go away. Right now we don't have all archs
* converted, so better keep it safe.
*/
#ifdef ARCH_HAS_SG_CHAIN
blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
#else
blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
#endif
blk_queue_max_sectors(q, shost->max_sectors); blk_queue_max_sectors(q, shost->max_sectors);
blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
blk_queue_segment_boundary(q, shost->dma_boundary); blk_queue_segment_boundary(q, shost->dma_boundary);
......
...@@ -10,13 +10,6 @@ ...@@ -10,13 +10,6 @@
#include <linux/types.h> #include <linux/types.h>
/*
* The maximum sg list length SCSI can cope with
* (currently must be a power of 2 between 32 and 256)
*/
#define SCSI_MAX_PHYS_SEGMENTS MAX_PHYS_SEGMENTS
/* /*
* SCSI command lengths * SCSI command lengths
*/ */
......
...@@ -70,6 +70,7 @@ struct scsi_cmnd { ...@@ -70,6 +70,7 @@ struct scsi_cmnd {
/* These elements define the operation we ultimately want to perform */ /* These elements define the operation we ultimately want to perform */
unsigned short use_sg; /* Number of pieces of scatter-gather */ unsigned short use_sg; /* Number of pieces of scatter-gather */
unsigned short sglist_len; /* size of malloc'd scatter-gather list */ unsigned short sglist_len; /* size of malloc'd scatter-gather list */
unsigned short __use_sg;
unsigned underflow; /* Return error if less than unsigned underflow; /* Return error if less than
this amount is transferred */ this amount is transferred */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment