Commit 8f5697e9 authored by Philipp Reisner's avatar Philipp Reisner

Removed drbd_io_error(), which predated the time of the worker based after_state_ch() [Bugz 224]

Besides removing some lines of code, this removes also a deadlock.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent ff940fd4
......@@ -349,10 +349,8 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
sector = mdev->ldev->md.md_offset
+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
drbd_chk_io_error(mdev, 1, TRUE);
drbd_io_error(mdev, TRUE);
}
if (++mdev->al_tr_pos >
div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
......@@ -542,7 +540,6 @@ static void atodb_endio(struct bio *bio, int error)
if (!error && !uptodate)
error = -EIO;
/* corresponding drbd_io_error is in drbd_al_to_on_disk_bm */
drbd_chk_io_error(mdev, error, TRUE);
if (error && wc->error == 0)
wc->error = error;
......@@ -715,8 +712,6 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
put_ldev(mdev);
if (wc.error)
drbd_io_error(mdev, TRUE);
kfree(bios);
return;
......
......@@ -838,7 +838,6 @@ static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) {
dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
drbd_chk_io_error(mdev, 1, TRUE);
drbd_io_error(mdev, TRUE);
err = -EIO;
}
......@@ -911,7 +910,6 @@ int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(lo
"(meta-disk sector %llus)\n",
enr, (unsigned long long)on_disk_sector);
drbd_chk_io_error(mdev, 1, TRUE);
drbd_io_error(mdev, TRUE);
for (i = 0; i < AL_EXT_PER_BM_SECT; i++)
drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i);
}
......
......@@ -1145,7 +1145,6 @@ extern int drbd_send_bitmap(struct drbd_conf *mdev);
extern int _drbd_send_bitmap(struct drbd_conf *mdev);
extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode);
extern void drbd_free_bc(struct drbd_backing_dev *ldev);
extern int drbd_io_error(struct drbd_conf *mdev, int forcedetach);
extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
/* drbd_meta-data.c (still in drbd_main.c) */
......@@ -1653,7 +1652,7 @@ static inline void __drbd_chk_io_error(struct drbd_conf *mdev, int forcedetach)
* @error: Error code passed to the IO completion callback
* @forcedetach: Force detach. I.e. the error happened while accessing the meta data
*
* See also drbd_io_error().
* See also drbd_main.c:after_state_ch() if (os.disk > D_FAILED && ns.disk == D_FAILED)
*/
static inline void drbd_chk_io_error(struct drbd_conf *mdev,
int error, int forcedetach)
......
......@@ -416,61 +416,6 @@ void tl_clear(struct drbd_conf *mdev)
spin_unlock_irq(&mdev->req_lock);
}
/**
* drbd_io_error() - Detach from the local disk of so configured with the on_io_error setting
* @mdev: DRBD device.
* @force_detach: Detach no matter how on_io_error is set (meta data IO error)
*
* Should be called in the unlikely(!drbd_bio_uptodate(e->bio)) case from
* kernel thread context. See also drbd_chk_io_error().
*/
int drbd_io_error(struct drbd_conf *mdev, int force_detach)
{
enum drbd_io_error_p eh;
unsigned long flags;
int send;
int ok = 1;
eh = EP_PASS_ON;
if (get_ldev_if_state(mdev, D_FAILED)) {
eh = mdev->ldev->dc.on_io_error;
put_ldev(mdev);
}
if (!force_detach && eh == EP_PASS_ON)
return 1;
spin_lock_irqsave(&mdev->req_lock, flags);
send = (mdev->state.disk == D_FAILED);
if (send)
_drbd_set_state(_NS(mdev, disk, D_DISKLESS), CS_HARD, NULL);
spin_unlock_irqrestore(&mdev->req_lock, flags);
if (!send)
return ok;
if (mdev->state.conn >= C_CONNECTED) {
ok = drbd_send_state(mdev);
if (ok)
dev_warn(DEV, "Notified peer that my disk is broken.\n");
else
dev_err(DEV, "Sending state in drbd_io_error() failed\n");
}
/* Make sure we try to flush meta-data to disk - we come
* in here because of a local disk error so it might fail
* but we still need to try -- both because the error might
* be in the data portion of the disk and because we need
* to ensure the md-sync-timer is stopped if running. */
drbd_md_sync(mdev);
/* Releasing the backing device is done in after_state_ch() */
if (eh == EP_CALL_HELPER)
drbd_khelper(mdev, "local-io-error");
return ok;
}
/**
* cl_wide_st_chg() - TRUE if the state change is a cluster wide one
......@@ -1332,17 +1277,41 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) {
if (os.disk > D_FAILED && ns.disk == D_FAILED) {
enum drbd_io_error_p eh;
eh = EP_PASS_ON;
if (get_ldev_if_state(mdev, D_FAILED)) {
eh = mdev->ldev->dc.on_io_error;
put_ldev(mdev);
}
drbd_rs_cancel_all(mdev);
/* since get_ldev() only works as long as disk>=D_INCONSISTENT,
and it is D_DISKLESS here, local_cnt can only go down, it can
not increase... It will reach zero */
wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
drbd_rs_cancel_all(mdev);
mdev->rs_total = 0;
mdev->rs_failed = 0;
atomic_set(&mdev->rs_pending_cnt, 0);
spin_lock_irq(&mdev->req_lock);
_drbd_set_state(_NS(mdev, disk, D_DISKLESS), CS_HARD, NULL);
spin_unlock_irq(&mdev->req_lock);
if (eh == EP_CALL_HELPER)
drbd_khelper(mdev, "local-io-error");
}
if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) {
if (os.disk == D_FAILED) /* && ns.disk == D_DISKLESS*/ {
if (drbd_send_state(mdev))
dev_warn(DEV, "Notified peer that my disk is broken.\n");
else
dev_err(DEV, "Sending state in drbd_io_error() failed\n");
}
lc_destroy(mdev->resync);
mdev->resync = NULL;
lc_destroy(mdev->act_log);
......@@ -3305,7 +3274,6 @@ void drbd_md_sync(struct drbd_conf *mdev)
dev_err(DEV, "meta data update failed!\n");
drbd_chk_io_error(mdev, 1, TRUE);
drbd_io_error(mdev, TRUE);
}
/* Update mdev->ldev->md.la_size_sect,
......
......@@ -1465,7 +1465,6 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u
drbd_rs_failed_io(mdev, sector, e->size);
ok = drbd_send_ack(mdev, P_NEG_ACK, e);
ok &= drbd_io_error(mdev, FALSE);
}
dec_unacked(mdev);
......@@ -1607,15 +1606,11 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int unused)
drbd_set_in_sync(mdev, sector, e->size);
} else {
ok = drbd_send_ack(mdev, P_NEG_ACK, e);
ok &= drbd_io_error(mdev, FALSE);
/* we expect it to be marked out of sync anyways...
* maybe assert this? */
}
dec_unacked(mdev);
} else if (unlikely(!drbd_bio_uptodate(e->private_bio))) {
ok = drbd_io_error(mdev, FALSE);
}
/* we delete from the conflict detection hash _after_ we sent out the
* P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
if (mdev->net_conf->two_primaries) {
......
......@@ -257,7 +257,6 @@ void drbd_endio_pri(struct bio *bio, int error)
int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
struct drbd_request *req = container_of(w, struct drbd_request, w);
int ok;
/* NOTE: mdev->ldev can be NULL by the time we get here! */
/* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */
......@@ -266,10 +265,7 @@ int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
* when it is done and had a local write error, see comments there */
drbd_req_free(req);
ok = drbd_io_error(mdev, FALSE);
if (unlikely(!ok))
dev_err(DEV, "Sending in w_io_error() failed\n");
return ok;
return TRUE;
}
int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
......@@ -279,7 +275,6 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
/* We should not detach for read io-error,
* but try to WRITE the P_DATA_REPLY to the failed location,
* to give the disk the chance to relocate that block */
drbd_io_error(mdev, FALSE); /* tries to schedule a detach and notifies peer */
spin_lock_irq(&mdev->req_lock);
if (cancel ||
......@@ -354,10 +349,8 @@ static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel
dev_err(DEV, "kmalloc() of digest failed.\n");
ok = 0;
}
} else {
drbd_io_error(mdev, FALSE);
} else
ok = 1;
}
drbd_free_ee(mdev, e);
......@@ -849,8 +842,6 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
(unsigned long long)e->sector);
ok = drbd_send_ack(mdev, P_NEG_DREPLY, e);
drbd_io_error(mdev, FALSE);
}
dec_unacked(mdev);
......@@ -901,8 +892,6 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
drbd_io_error(mdev, FALSE);
/* update resync data with failure */
drbd_rs_failed_io(mdev, e->sector, e->size);
}
......@@ -962,7 +951,6 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
drbd_io_error(mdev, FALSE);
}
dec_unacked(mdev);
......@@ -1056,7 +1044,6 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
drbd_io_error(mdev, FALSE);
}
dec_unacked(mdev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment