Commit a6f23657 authored by Jens Axboe's avatar Jens Axboe

block: add one-hit cache for disk partition lookup

disk_map_sector_rcu() returns a partition from a sector offset,
which we use for IO statistics on a per-partition basis. The
lookup itself is an O(N) list lookup, where N is the number of
partitions. This actually hurts performance quite a bit, even
on the lower end partitions. On higher numbered partitions,
it can get pretty bad.

Solve this by adding a one-hit cache for partition lookup.
This makes the lookup O(1) for the case where we do most IO to
one partition. Even for mixed partition workloads, amortized cost
is pretty close to O(1) since the natural IO batching makes the
one-hit cache last for lots of IOs.
Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent 30e0dc28
...@@ -181,6 +181,12 @@ void disk_part_iter_exit(struct disk_part_iter *piter) ...@@ -181,6 +181,12 @@ void disk_part_iter_exit(struct disk_part_iter *piter)
} }
EXPORT_SYMBOL_GPL(disk_part_iter_exit); EXPORT_SYMBOL_GPL(disk_part_iter_exit);
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
{
return part->start_sect <= sector &&
sector < part->start_sect + part->nr_sects;
}
/** /**
* disk_map_sector_rcu - map sector to partition * disk_map_sector_rcu - map sector to partition
* @disk: gendisk of interest * @disk: gendisk of interest
...@@ -199,16 +205,22 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit); ...@@ -199,16 +205,22 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
{ {
struct disk_part_tbl *ptbl; struct disk_part_tbl *ptbl;
struct hd_struct *part;
int i; int i;
ptbl = rcu_dereference(disk->part_tbl); ptbl = rcu_dereference(disk->part_tbl);
part = rcu_dereference(ptbl->last_lookup);
if (part && sector_in_part(part, sector))
return part;
for (i = 1; i < ptbl->len; i++) { for (i = 1; i < ptbl->len; i++) {
struct hd_struct *part = rcu_dereference(ptbl->part[i]); part = rcu_dereference(ptbl->part[i]);
if (part && part->start_sect <= sector && if (part && sector_in_part(part, sector)) {
sector < part->start_sect + part->nr_sects) rcu_assign_pointer(ptbl->last_lookup, part);
return part; return part;
}
} }
return &disk->part0; return &disk->part0;
} }
...@@ -888,8 +900,11 @@ static void disk_replace_part_tbl(struct gendisk *disk, ...@@ -888,8 +900,11 @@ static void disk_replace_part_tbl(struct gendisk *disk,
struct disk_part_tbl *old_ptbl = disk->part_tbl; struct disk_part_tbl *old_ptbl = disk->part_tbl;
rcu_assign_pointer(disk->part_tbl, new_ptbl); rcu_assign_pointer(disk->part_tbl, new_ptbl);
if (old_ptbl)
if (old_ptbl) {
rcu_assign_pointer(old_ptbl->last_lookup, NULL);
call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb); call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
}
} }
/** /**
......
...@@ -126,6 +126,7 @@ struct blk_scsi_cmd_filter { ...@@ -126,6 +126,7 @@ struct blk_scsi_cmd_filter {
struct disk_part_tbl { struct disk_part_tbl {
struct rcu_head rcu_head; struct rcu_head rcu_head;
int len; int len;
struct hd_struct *last_lookup;
struct hd_struct *part[]; struct hd_struct *part[];
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment