Commit 25bc6b07 authored by Vivek Goyal's avatar Vivek Goyal Committed by Jens Axboe

blkio: Introduce per cfq group weights and vdisktime calculations

o Bring in the per cfq group weight and how vdisktime is calculated for the
  group. Also bring in the functionality of updating the min_vdisktime of
  the group service tree.
Signed-off-by: default avatarVivek Goyal <vgoyal@redhat.com>
Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent 31e4c28d
...@@ -23,7 +23,6 @@ config IOSCHED_DEADLINE ...@@ -23,7 +23,6 @@ config IOSCHED_DEADLINE
config IOSCHED_CFQ config IOSCHED_CFQ
tristate "CFQ I/O scheduler" tristate "CFQ I/O scheduler"
select BLK_CGROUP
default y default y
---help--- ---help---
The CFQ I/O scheduler tries to distribute bandwidth equally The CFQ I/O scheduler tries to distribute bandwidth equally
...@@ -33,6 +32,14 @@ config IOSCHED_CFQ ...@@ -33,6 +32,14 @@ config IOSCHED_CFQ
This is the default I/O scheduler. This is the default I/O scheduler.
config CFQ_GROUP_IOSCHED
bool "CFQ Group Scheduling support"
depends on IOSCHED_CFQ && CGROUPS
select BLK_CGROUP
default n
---help---
Enable group IO scheduling in CFQ.
choice choice
prompt "Default I/O scheduler" prompt "Default I/O scheduler"
default DEFAULT_CFQ default DEFAULT_CFQ
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <linux/ioprio.h> #include <linux/ioprio.h>
#include <linux/blktrace_api.h> #include <linux/blktrace_api.h>
#include "blk-cgroup.h"
/* /*
* tunables * tunables
...@@ -49,6 +50,7 @@ static const int cfq_hist_divisor = 4; ...@@ -49,6 +50,7 @@ static const int cfq_hist_divisor = 4;
#define CFQ_SLICE_SCALE (5) #define CFQ_SLICE_SCALE (5)
#define CFQ_HW_QUEUE_MIN (5) #define CFQ_HW_QUEUE_MIN (5)
#define CFQ_SERVICE_SHIFT 12
#define RQ_CIC(rq) \ #define RQ_CIC(rq) \
((struct cfq_io_context *) (rq)->elevator_private) ((struct cfq_io_context *) (rq)->elevator_private)
...@@ -79,6 +81,7 @@ struct cfq_rb_root { ...@@ -79,6 +81,7 @@ struct cfq_rb_root {
struct rb_node *left; struct rb_node *left;
unsigned count; unsigned count;
u64 min_vdisktime; u64 min_vdisktime;
struct rb_node *active;
}; };
#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, } #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
...@@ -163,6 +166,7 @@ struct cfq_group { ...@@ -163,6 +166,7 @@ struct cfq_group {
/* group service_tree key */ /* group service_tree key */
u64 vdisktime; u64 vdisktime;
unsigned int weight;
bool on_st; bool on_st;
/* number of cfqq currently on this group */ /* number of cfqq currently on this group */
...@@ -434,6 +438,51 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) ...@@ -434,6 +438,51 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
} }
static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg)
{
u64 d = delta << CFQ_SERVICE_SHIFT;
d = d * BLKIO_WEIGHT_DEFAULT;
do_div(d, cfqg->weight);
return d;
}
static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
{
s64 delta = (s64)(vdisktime - min_vdisktime);
if (delta > 0)
min_vdisktime = vdisktime;
return min_vdisktime;
}
static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
{
s64 delta = (s64)(vdisktime - min_vdisktime);
if (delta < 0)
min_vdisktime = vdisktime;
return min_vdisktime;
}
static void update_min_vdisktime(struct cfq_rb_root *st)
{
u64 vdisktime = st->min_vdisktime;
struct cfq_group *cfqg;
if (st->active) {
cfqg = rb_entry_cfqg(st->active);
vdisktime = cfqg->vdisktime;
}
if (st->left) {
cfqg = rb_entry_cfqg(st->left);
vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
}
st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime);
}
/* /*
* get averaged number of queues of RT/BE priority. * get averaged number of queues of RT/BE priority.
* average is updated, with a formula that gives more weight to higher numbers, * average is updated, with a formula that gives more weight to higher numbers,
...@@ -734,8 +783,12 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) ...@@ -734,8 +783,12 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
{ {
struct cfq_rb_root *st = &cfqd->grp_service_tree; struct cfq_rb_root *st = &cfqd->grp_service_tree;
if (st->active == &cfqg->rb_node)
st->active = NULL;
BUG_ON(cfqg->nr_cfqq < 1); BUG_ON(cfqg->nr_cfqq < 1);
cfqg->nr_cfqq--; cfqg->nr_cfqq--;
/* If there are other cfq queues under this group, don't delete it */ /* If there are other cfq queues under this group, don't delete it */
if (cfqg->nr_cfqq) if (cfqg->nr_cfqq)
return; return;
...@@ -1654,10 +1707,14 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) ...@@ -1654,10 +1707,14 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
{ {
struct cfq_rb_root *st = &cfqd->grp_service_tree; struct cfq_rb_root *st = &cfqd->grp_service_tree;
struct cfq_group *cfqg;
if (RB_EMPTY_ROOT(&st->rb)) if (RB_EMPTY_ROOT(&st->rb))
return NULL; return NULL;
return cfq_rb_first_group(st); cfqg = cfq_rb_first_group(st);
st->active = &cfqg->rb_node;
update_min_vdisktime(st);
return cfqg;
} }
static void cfq_choose_cfqg(struct cfq_data *cfqd) static void cfq_choose_cfqg(struct cfq_data *cfqd)
...@@ -3150,6 +3207,9 @@ static void *cfq_init_queue(struct request_queue *q) ...@@ -3150,6 +3207,9 @@ static void *cfq_init_queue(struct request_queue *q)
*st = CFQ_RB_ROOT; *st = CFQ_RB_ROOT;
RB_CLEAR_NODE(&cfqg->rb_node); RB_CLEAR_NODE(&cfqg->rb_node);
/* Give preference to root group over other groups */
cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
/* /*
* Not strictly needed (since RB_ROOT just clears the node and we * Not strictly needed (since RB_ROOT just clears the node and we
* zeroed cfqd on alloc), but better be safe in case someone decides * zeroed cfqd on alloc), but better be safe in case someone decides
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment