diff --git a/i965_drv_video/i965_render.c b/i965_drv_video/i965_render.c
index 2215d269df685a2a1e2e4497d0221ee4bd8b24d6..8789ca88b2e4c45194b3314976fe402f811b8475 100644
--- a/i965_drv_video/i965_render.c
+++ b/i965_drv_video/i965_render.c
@@ -1265,7 +1265,10 @@ i965_clear_dest_region(VADriverContextP ctx)
 
     br13 |= pitch;
 
-    BEGIN_BATCH(ctx, 6);
+    if (IS_GEN6(i965->intel.device_id))
+        BEGIN_BLT_BATCH(ctx, 6);
+    else
+        BEGIN_BATCH(ctx, 6);
     OUT_BATCH(ctx, blt_cmd);
     OUT_BATCH(ctx, br13);
     OUT_BATCH(ctx, (dest_region->y << 16) | (dest_region->x));
@@ -1281,9 +1284,9 @@ i965_clear_dest_region(VADriverContextP ctx)
 static void
 i965_surface_render_pipeline_setup(VADriverContextP ctx)
 {
+    i965_clear_dest_region(ctx);
     intel_batchbuffer_start_atomic(ctx, 0x1000);
     intel_batchbuffer_emit_mi_flush(ctx);
-    i965_clear_dest_region(ctx);
     i965_render_pipeline_select(ctx);
     i965_render_state_sip(ctx);
     i965_render_state_base_address(ctx);
@@ -1914,6 +1917,7 @@ gen6_render_put_surface(VADriverContextP ctx,
     gen6_render_setup_states(ctx, surface,
                              srcx, srcy, srcw, srch,
                              destx, desty, destw, desth);
+    i965_clear_dest_region(ctx);
     gen6_render_emit_states(ctx, PS_KERNEL);
     intel_batchbuffer_flush(ctx);
 }
diff --git a/i965_drv_video/intel_batchbuffer.c b/i965_drv_video/intel_batchbuffer.c
index 87c2abc431daa6dab2bcef965a9a06a7ca5f5ad2..4988e9cfd46b908b2b88a48fc318773febff4fed 100644
--- a/i965_drv_video/intel_batchbuffer.c
+++ b/i965_drv_video/intel_batchbuffer.c
@@ -40,6 +40,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
     int batch_size = BATCH_SIZE;
 
     assert(batch->flag == I915_EXEC_RENDER ||
+           batch->flag == I915_EXEC_BLT ||
            batch->flag == I915_EXEC_BSD);
 
     dri_bo_unreference(batch->buffer);
@@ -284,9 +285,20 @@ intel_batchbuffer_data_bcs(VADriverContextP ctx, void *data, unsigned int size)
 void
 intel_batchbuffer_emit_mi_flush(VADriverContextP ctx)
 {
-    BEGIN_BATCH(ctx, 1);
-    OUT_BATCH(ctx, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
-    ADVANCE_BATCH(ctx);
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+
+    if (intel->batch->flag == I915_EXEC_BLT) {
+        BEGIN_BLT_BATCH(ctx, 4);
+        OUT_BATCH(ctx, MI_FLUSH_DW);
+        OUT_BATCH(ctx, 0);
+        OUT_BATCH(ctx, 0);
+        OUT_BATCH(ctx, 0);
+        ADVANCE_BATCH(ctx);
+    } else if (intel->batch->flag == I915_EXEC_RENDER) {
+        BEGIN_BATCH(ctx, 1);
+        OUT_BATCH(ctx, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
+        ADVANCE_BATCH(ctx);
+    }
 }
 
 void
@@ -322,7 +334,7 @@ void
 intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size)
 {
     struct intel_driver_data *intel = intel_driver_data(ctx);
-
+    intel_batchbuffer_check_batchbuffer_flag(ctx, I915_EXEC_RENDER);
     intel_batchbuffer_start_atomic_helper(ctx, intel->batch, size);
 }
 
@@ -400,3 +412,20 @@ intel_batchbuffer_advance_batch_bcs(VADriverContextP ctx)
 
    intel_batchbuffer_advance_batch_helper(intel->batch_bcs);
 }
+
+void
+intel_batchbuffer_check_batchbuffer_flag(VADriverContextP ctx, int flag)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+
+    if (flag != I915_EXEC_RENDER &&
+        flag != I915_EXEC_BLT &&
+        flag != I915_EXEC_BSD)
+        return;
+
+    if (intel->batch->flag == flag)
+        return;
+
+    intel_batchbuffer_flush_helper(ctx, intel->batch);
+    intel->batch->flag = flag;
+}
diff --git a/i965_drv_video/intel_batchbuffer.h b/i965_drv_video/intel_batchbuffer.h
index 7c8e6f51dabf7424ac38861887c637e6828b54b4..25652e10307e93788caa72b7023c8d57dcfc400e 100644
--- a/i965_drv_video/intel_batchbuffer.h
+++ b/i965_drv_video/intel_batchbuffer.h
@@ -57,10 +57,16 @@ Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx);
 void intel_batchbuffer_begin_batch_bcs(VADriverContextP ctx, int total);
 void intel_batchbuffer_advance_batch_bcs(VADriverContextP ctx);
 
-#define BEGIN_BATCH(ctx, n) do {                                \
-   intel_batchbuffer_require_space(ctx, (n) * 4);               \
-   intel_batchbuffer_begin_batch(ctx, (n));                     \
-} while (0)
+void intel_batchbuffer_check_batchbuffer_flag(VADriverContextP ctx, int flag);
+
+#define __BEGIN_BATCH(ctx, n, flag) do {                        \
+        intel_batchbuffer_check_batchbuffer_flag(ctx, flag);    \
+        intel_batchbuffer_require_space(ctx, (n) * 4);          \
+        intel_batchbuffer_begin_batch(ctx, (n));                \
+    } while (0)
+
+#define BEGIN_BATCH(ctx, n)             __BEGIN_BATCH(ctx, n, I915_EXEC_RENDER)
+#define BEGIN_BLT_BATCH(ctx, n)         __BEGIN_BATCH(ctx, n, I915_EXEC_BLT)
 
 #define OUT_BATCH(ctx, d) do {                                  \
    intel_batchbuffer_emit_dword(ctx, d);                        \