Commit 0f973f27 authored by Jesse Barnes's avatar Jesse Barnes Committed by Dave Airlie

drm/i915: add fence register management to execbuf

Adds code to set up fence registers at execbuf time on pre-965 chips as
necessary.  Also fixes up a few bugs in the pre-965 tile register support
(get_order != ffs).  The number of fences available to the kernel defaults
to the hw limit minus 3 (for legacy X front/back/depth), but a new parameter
allows userspace to override that as needed.
Signed-off-by: default avatarJesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: default avatarEric Anholt <eric@anholt.net>
Signed-off-by: default avatarDave Airlie <airlied@linux.ie>
parent d9ddcb96
...@@ -731,6 +731,9 @@ static int i915_getparam(struct drm_device *dev, void *data, ...@@ -731,6 +731,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_GEM: case I915_PARAM_HAS_GEM:
value = dev_priv->has_gem; value = dev_priv->has_gem;
break; break;
case I915_PARAM_NUM_FENCES_AVAIL:
value = dev_priv->num_fence_regs - dev_priv->fence_reg_start;
break;
default: default:
DRM_ERROR("Unknown parameter %d\n", param->param); DRM_ERROR("Unknown parameter %d\n", param->param);
return -EINVAL; return -EINVAL;
...@@ -764,6 +767,13 @@ static int i915_setparam(struct drm_device *dev, void *data, ...@@ -764,6 +767,13 @@ static int i915_setparam(struct drm_device *dev, void *data,
case I915_SETPARAM_ALLOW_BATCHBUFFER: case I915_SETPARAM_ALLOW_BATCHBUFFER:
dev_priv->allow_batchbuffer = param->value; dev_priv->allow_batchbuffer = param->value;
break; break;
case I915_SETPARAM_NUM_USED_FENCES:
if (param->value > dev_priv->num_fence_regs ||
param->value < 0)
return -EINVAL;
/* Userspace can use first N regs */
dev_priv->fence_reg_start = param->value;
break;
default: default:
DRM_ERROR("unknown parameter %d\n", param->param); DRM_ERROR("unknown parameter %d\n", param->param);
return -EINVAL; return -EINVAL;
......
...@@ -602,6 +602,7 @@ int i915_gem_init_object(struct drm_gem_object *obj); ...@@ -602,6 +602,7 @@ int i915_gem_init_object(struct drm_gem_object *obj);
void i915_gem_free_object(struct drm_gem_object *obj); void i915_gem_free_object(struct drm_gem_object *obj);
int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment); int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment);
void i915_gem_object_unpin(struct drm_gem_object *obj); void i915_gem_object_unpin(struct drm_gem_object *obj);
int i915_gem_object_unbind(struct drm_gem_object *obj);
void i915_gem_lastclose(struct drm_device *dev); void i915_gem_lastclose(struct drm_device *dev);
uint32_t i915_get_gem_seqno(struct drm_device *dev); uint32_t i915_get_gem_seqno(struct drm_device *dev);
void i915_gem_retire_requests(struct drm_device *dev); void i915_gem_retire_requests(struct drm_device *dev);
...@@ -785,6 +786,11 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); ...@@ -785,6 +786,11 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
IS_I945GM(dev) || IS_I965GM(dev) || IS_GM45(dev)) IS_I945GM(dev) || IS_I965GM(dev) || IS_GM45(dev))
#define I915_NEED_GFX_HWS(dev) (IS_G33(dev) || IS_GM45(dev) || IS_G4X(dev)) #define I915_NEED_GFX_HWS(dev) (IS_G33(dev) || IS_GM45(dev) || IS_G4X(dev))
/* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
* rows, which changed the alignment requirements and fence programming.
*/
#define HAS_128_BYTE_Y_TILING(dev) (IS_I9XX(dev) && !(IS_I915G(dev) || \
IS_I915GM(dev)))
#define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev)) #define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev))
#define PRIMARY_RINGBUFFER_SIZE (128*1024) #define PRIMARY_RINGBUFFER_SIZE (128*1024)
......
...@@ -52,7 +52,7 @@ static void i915_gem_object_free_page_list(struct drm_gem_object *obj); ...@@ -52,7 +52,7 @@ static void i915_gem_object_free_page_list(struct drm_gem_object *obj);
static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
unsigned alignment); unsigned alignment);
static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj); static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write);
static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
static int i915_gem_evict_something(struct drm_device *dev); static int i915_gem_evict_something(struct drm_device *dev);
static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
...@@ -567,6 +567,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -567,6 +567,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
pgoff_t page_offset; pgoff_t page_offset;
unsigned long pfn; unsigned long pfn;
int ret = 0; int ret = 0;
bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
/* We don't use vmf->pgoff since that has the fake offset */ /* We don't use vmf->pgoff since that has the fake offset */
page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
...@@ -586,7 +587,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -586,7 +587,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
/* Need a new fence register? */ /* Need a new fence register? */
if (obj_priv->fence_reg == I915_FENCE_REG_NONE && if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
obj_priv->tiling_mode != I915_TILING_NONE) { obj_priv->tiling_mode != I915_TILING_NONE) {
ret = i915_gem_object_get_fence_reg(obj); ret = i915_gem_object_get_fence_reg(obj, write);
if (ret != 0) if (ret != 0)
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
...@@ -1214,7 +1215,7 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj) ...@@ -1214,7 +1215,7 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
/** /**
* Unbinds an object from the GTT aperture. * Unbinds an object from the GTT aperture.
*/ */
static int int
i915_gem_object_unbind(struct drm_gem_object *obj) i915_gem_object_unbind(struct drm_gem_object *obj)
{ {
struct drm_device *dev = obj->dev; struct drm_device *dev = obj->dev;
...@@ -1448,21 +1449,26 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) ...@@ -1448,21 +1449,26 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
drm_i915_private_t *dev_priv = dev->dev_private; drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj_priv = obj->driver_private; struct drm_i915_gem_object *obj_priv = obj->driver_private;
int regnum = obj_priv->fence_reg; int regnum = obj_priv->fence_reg;
int tile_width;
uint32_t val; uint32_t val;
uint32_t pitch_val; uint32_t pitch_val;
if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
(obj_priv->gtt_offset & (obj->size - 1))) { (obj_priv->gtt_offset & (obj->size - 1))) {
WARN(1, "%s: object not 1M or size aligned\n", __func__); WARN(1, "%s: object 0x%08x not 1M or size (0x%x) aligned\n",
__func__, obj_priv->gtt_offset, obj->size);
return; return;
} }
if (obj_priv->tiling_mode == I915_TILING_Y && (IS_I945G(dev) || if (obj_priv->tiling_mode == I915_TILING_Y &&
IS_I945GM(dev) || HAS_128_BYTE_Y_TILING(dev))
IS_G33(dev))) tile_width = 128;
pitch_val = (obj_priv->stride / 128) - 1;
else else
pitch_val = (obj_priv->stride / 512) - 1; tile_width = 512;
/* Note: pitch better be a power of two tile widths */
pitch_val = obj_priv->stride / tile_width;
pitch_val = ffs(pitch_val) - 1;
val = obj_priv->gtt_offset; val = obj_priv->gtt_offset;
if (obj_priv->tiling_mode == I915_TILING_Y) if (obj_priv->tiling_mode == I915_TILING_Y)
...@@ -1486,7 +1492,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) ...@@ -1486,7 +1492,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
(obj_priv->gtt_offset & (obj->size - 1))) { (obj_priv->gtt_offset & (obj->size - 1))) {
WARN(1, "%s: object not 1M or size aligned\n", __func__); WARN(1, "%s: object 0x%08x not 1M or size aligned\n",
__func__, obj_priv->gtt_offset);
return; return;
} }
...@@ -1506,6 +1513,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) ...@@ -1506,6 +1513,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
/** /**
* i915_gem_object_get_fence_reg - set up a fence reg for an object * i915_gem_object_get_fence_reg - set up a fence reg for an object
* @obj: object to map through a fence reg * @obj: object to map through a fence reg
* @write: object is about to be written
* *
* When mapping objects through the GTT, userspace wants to be able to write * When mapping objects through the GTT, userspace wants to be able to write
* to them without having to worry about swizzling if the object is tiled. * to them without having to worry about swizzling if the object is tiled.
...@@ -1517,7 +1525,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) ...@@ -1517,7 +1525,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
* and tiling format. * and tiling format.
*/ */
static int static int
i915_gem_object_get_fence_reg(struct drm_gem_object *obj) i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write)
{ {
struct drm_device *dev = obj->dev; struct drm_device *dev = obj->dev;
struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_private *dev_priv = dev->dev_private;
...@@ -1530,12 +1538,18 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) ...@@ -1530,12 +1538,18 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
WARN(1, "allocating a fence for non-tiled object?\n"); WARN(1, "allocating a fence for non-tiled object?\n");
break; break;
case I915_TILING_X: case I915_TILING_X:
WARN(obj_priv->stride & (512 - 1), if (!obj_priv->stride)
"object is X tiled but has non-512B pitch\n"); return -EINVAL;
WARN((obj_priv->stride & (512 - 1)),
"object 0x%08x is X tiled but has non-512B pitch\n",
obj_priv->gtt_offset);
break; break;
case I915_TILING_Y: case I915_TILING_Y:
WARN(obj_priv->stride & (128 - 1), if (!obj_priv->stride)
"object is Y tiled but has non-128B pitch\n"); return -EINVAL;
WARN((obj_priv->stride & (128 - 1)),
"object 0x%08x is Y tiled but has non-128B pitch\n",
obj_priv->gtt_offset);
break; break;
} }
...@@ -1637,7 +1651,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) ...@@ -1637,7 +1651,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
if (dev_priv->mm.suspended) if (dev_priv->mm.suspended)
return -EBUSY; return -EBUSY;
if (alignment == 0) if (alignment == 0)
alignment = PAGE_SIZE; alignment = i915_gem_get_gtt_alignment(obj);
if (alignment & (PAGE_SIZE - 1)) { if (alignment & (PAGE_SIZE - 1)) {
DRM_ERROR("Invalid object alignment requested %u\n", alignment); DRM_ERROR("Invalid object alignment requested %u\n", alignment);
return -EINVAL; return -EINVAL;
...@@ -2658,6 +2672,14 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) ...@@ -2658,6 +2672,14 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
DRM_ERROR("Failure to bind: %d", ret); DRM_ERROR("Failure to bind: %d", ret);
return ret; return ret;
} }
/*
* Pre-965 chips need a fence register set up in order to
* properly handle tiled surfaces.
*/
if (!IS_I965G(dev) &&
obj_priv->fence_reg == I915_FENCE_REG_NONE &&
obj_priv->tiling_mode != I915_TILING_NONE)
i915_gem_object_get_fence_reg(obj, true);
} }
obj_priv->pin_count++; obj_priv->pin_count++;
...@@ -3297,7 +3319,7 @@ i915_gem_load(struct drm_device *dev) ...@@ -3297,7 +3319,7 @@ i915_gem_load(struct drm_device *dev)
/* Old X drivers will take 0-2 for front, back, depth buffers */ /* Old X drivers will take 0-2 for front, back, depth buffers */
dev_priv->fence_reg_start = 3; dev_priv->fence_reg_start = 3;
if (IS_I965G(dev)) if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
dev_priv->num_fence_regs = 16; dev_priv->num_fence_regs = 16;
else else
dev_priv->num_fence_regs = 8; dev_priv->num_fence_regs = 8;
......
...@@ -173,6 +173,73 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) ...@@ -173,6 +173,73 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
dev_priv->mm.bit_6_swizzle_y = swizzle_y; dev_priv->mm.bit_6_swizzle_y = swizzle_y;
} }
/**
* Returns the size of the fence for a tiled object of the given size.
*/
static int
i915_get_fence_size(struct drm_device *dev, int size)
{
int i;
int start;
if (IS_I965G(dev)) {
/* The 965 can have fences at any page boundary. */
return ALIGN(size, 4096);
} else {
/* Align the size to a power of two greater than the smallest
* fence size.
*/
if (IS_I9XX(dev))
start = 1024 * 1024;
else
start = 512 * 1024;
for (i = start; i < size; i <<= 1)
;
return i;
}
}
/* Check pitch constriants for all chips & tiling formats */
static bool
i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
{
int tile_width;
/* Linear is always fine */
if (tiling_mode == I915_TILING_NONE)
return true;
if (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
tile_width = 128;
else
tile_width = 512;
/* 965+ just needs multiples of tile width */
if (IS_I965G(dev)) {
if (stride & (tile_width - 1))
return false;
return true;
}
/* Pre-965 needs power of two tile widths */
if (stride < tile_width)
return false;
if (stride & (stride - 1))
return false;
/* We don't handle the aperture area covered by the fence being bigger
* than the object size.
*/
if (i915_get_fence_size(dev, size) != size)
return false;
return true;
}
/** /**
* Sets the tiling mode of an object, returning the required swizzling of * Sets the tiling mode of an object, returning the required swizzling of
* bit 6 of addresses in the object. * bit 6 of addresses in the object.
...@@ -191,6 +258,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, ...@@ -191,6 +258,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
return -EINVAL; return -EINVAL;
obj_priv = obj->driver_private; obj_priv = obj->driver_private;
if (!i915_tiling_ok(dev, args->stride, obj->size, args->tiling_mode))
return -EINVAL;
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
if (args->tiling_mode == I915_TILING_NONE) { if (args->tiling_mode == I915_TILING_NONE) {
...@@ -207,7 +277,23 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, ...@@ -207,7 +277,23 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
} }
} }
obj_priv->tiling_mode = args->tiling_mode; if (args->tiling_mode != obj_priv->tiling_mode) {
int ret;
/* Unbind the object, as switching tiling means we're
* switching the cache organization due to fencing, probably.
*/
ret = i915_gem_object_unbind(obj);
if (ret != 0) {
WARN(ret != -ERESTARTSYS,
"failed to unbind object for tiling switch");
args->tiling_mode = obj_priv->tiling_mode;
mutex_unlock(&dev->struct_mutex);
return ret;
}
obj_priv->tiling_mode = args->tiling_mode;
}
obj_priv->stride = args->stride; obj_priv->stride = args->stride;
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
......
...@@ -186,12 +186,12 @@ ...@@ -186,12 +186,12 @@
#define FENCE_REG_830_0 0x2000 #define FENCE_REG_830_0 0x2000
#define I830_FENCE_START_MASK 0x07f80000 #define I830_FENCE_START_MASK 0x07f80000
#define I830_FENCE_TILING_Y_SHIFT 12 #define I830_FENCE_TILING_Y_SHIFT 12
#define I830_FENCE_SIZE_BITS(size) ((get_order(size >> 19) - 1) << 8) #define I830_FENCE_SIZE_BITS(size) ((ffs((size) >> 19) - 1) << 8)
#define I830_FENCE_PITCH_SHIFT 4 #define I830_FENCE_PITCH_SHIFT 4
#define I830_FENCE_REG_VALID (1<<0) #define I830_FENCE_REG_VALID (1<<0)
#define I915_FENCE_START_MASK 0x0ff00000 #define I915_FENCE_START_MASK 0x0ff00000
#define I915_FENCE_SIZE_BITS(size) ((get_order(size >> 20) - 1) << 8) #define I915_FENCE_SIZE_BITS(size) ((ffs((size) >> 20) - 1) << 8)
#define FENCE_REG_965_0 0x03000 #define FENCE_REG_965_0 0x03000
#define I965_FENCE_PITCH_SHIFT 2 #define I965_FENCE_PITCH_SHIFT 2
......
...@@ -261,6 +261,7 @@ typedef struct drm_i915_irq_wait { ...@@ -261,6 +261,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_LAST_DISPATCH 3 #define I915_PARAM_LAST_DISPATCH 3
#define I915_PARAM_CHIPSET_ID 4 #define I915_PARAM_CHIPSET_ID 4
#define I915_PARAM_HAS_GEM 5 #define I915_PARAM_HAS_GEM 5
#define I915_PARAM_NUM_FENCES_AVAIL 6
typedef struct drm_i915_getparam { typedef struct drm_i915_getparam {
int param; int param;
...@@ -272,6 +273,7 @@ typedef struct drm_i915_getparam { ...@@ -272,6 +273,7 @@ typedef struct drm_i915_getparam {
#define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1 #define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1
#define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 #define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2
#define I915_SETPARAM_ALLOW_BATCHBUFFER 3 #define I915_SETPARAM_ALLOW_BATCHBUFFER 3
#define I915_SETPARAM_NUM_USED_FENCES 4
typedef struct drm_i915_setparam { typedef struct drm_i915_setparam {
int param; int param;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment