Commit 6fa6f5bb authored by Artem Bityutskiy's avatar Artem Bityutskiy

UBI: handle write errors in WL worker

When a PEB is moved and a write error happens, UBI switches
to R/O mode, which is wrong, because we just copy the data
and may select a different PEB and re-try this. This patch
fixes WL worker's behavior.
Signed-off-by: default avatarArtem Bityutskiy <Artem.Bityutskiy@nokia.com>
parent 3c98b0a0
...@@ -950,9 +950,13 @@ write_error: ...@@ -950,9 +950,13 @@ write_error:
* physical eraseblock @to. The @vid_hdr buffer may be changed by this * physical eraseblock @to. The @vid_hdr buffer may be changed by this
* function. Returns: * function. Returns:
* o %0 in case of success; * o %0 in case of success;
* o %1 if the operation was canceled and should be tried later (e.g., * o %1 if the operation was canceled because the volume is being deleted
* because a bit-flip was detected at the target PEB); * or because the PEB was put meanwhile;
* o %2 if the volume is being deleted and this LEB should not be moved. * o %2 if the operation was canceled because there was a write error to the
* target PEB;
* o %-EAGAIN if the operation was canceled because a bit-flip was detected
* in the target PEB;
* o a negative error code in case of failure.
*/ */
int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
struct ubi_vid_hdr *vid_hdr) struct ubi_vid_hdr *vid_hdr)
...@@ -978,7 +982,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ...@@ -978,7 +982,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
/* /*
* Note, we may race with volume deletion, which means that the volume * Note, we may race with volume deletion, which means that the volume
* this logical eraseblock belongs to might be being deleted. Since the * this logical eraseblock belongs to might be being deleted. Since the
* volume deletion unmaps all the volume's logical eraseblocks, it will * volume deletion un-maps all the volume's logical eraseblocks, it will
* be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish.
*/ */
vol = ubi->volumes[idx]; vol = ubi->volumes[idx];
...@@ -986,7 +990,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ...@@ -986,7 +990,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
/* No need to do further work, cancel */ /* No need to do further work, cancel */
dbg_eba("volume %d is being removed, cancel", vol_id); dbg_eba("volume %d is being removed, cancel", vol_id);
spin_unlock(&ubi->volumes_lock); spin_unlock(&ubi->volumes_lock);
return 2; return 1;
} }
spin_unlock(&ubi->volumes_lock); spin_unlock(&ubi->volumes_lock);
...@@ -1023,7 +1027,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ...@@ -1023,7 +1027,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
/* /*
* OK, now the LEB is locked and we can safely start moving it. Since * OK, now the LEB is locked and we can safely start moving it. Since
* this function utilizes thie @ubi->peb1_buf buffer which is shared * this function utilizes the @ubi->peb1_buf buffer which is shared
* with some other functions, so lock the buffer by taking the * with some other functions, so lock the buffer by taking the
* @ubi->buf_mutex. * @ubi->buf_mutex.
*/ */
...@@ -1068,8 +1072,11 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ...@@ -1068,8 +1072,11 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); err = ubi_io_write_vid_hdr(ubi, to, vid_hdr);
if (err) if (err) {
if (err == -EIO)
err = 2;
goto out_unlock_buf; goto out_unlock_buf;
}
cond_resched(); cond_resched();
...@@ -1079,14 +1086,17 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ...@@ -1079,14 +1086,17 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
if (err != UBI_IO_BITFLIPS) if (err != UBI_IO_BITFLIPS)
ubi_warn("cannot read VID header back from PEB %d", to); ubi_warn("cannot read VID header back from PEB %d", to);
else else
err = 1; err = -EAGAIN;
goto out_unlock_buf; goto out_unlock_buf;
} }
if (data_size > 0) { if (data_size > 0) {
err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size);
if (err) if (err) {
if (err == -EIO)
err = 2;
goto out_unlock_buf; goto out_unlock_buf;
}
cond_resched(); cond_resched();
...@@ -1101,15 +1111,16 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ...@@ -1101,15 +1111,16 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
ubi_warn("cannot read data back from PEB %d", ubi_warn("cannot read data back from PEB %d",
to); to);
else else
err = 1; err = -EAGAIN;
goto out_unlock_buf; goto out_unlock_buf;
} }
cond_resched(); cond_resched();
if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) {
ubi_warn("read data back from PEB %d - it is different", ubi_warn("read data back from PEB %d and it is "
to); "different", to);
err = -EINVAL;
goto out_unlock_buf; goto out_unlock_buf;
} }
} }
......
...@@ -738,7 +738,7 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, ...@@ -738,7 +738,7 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
int cancel) int cancel)
{ {
int err, scrubbing = 0; int err, scrubbing = 0, torture = 0;
struct ubi_wl_prot_entry *uninitialized_var(pe); struct ubi_wl_prot_entry *uninitialized_var(pe);
struct ubi_wl_entry *e1, *e2; struct ubi_wl_entry *e1, *e2;
struct ubi_vid_hdr *vid_hdr; struct ubi_vid_hdr *vid_hdr;
...@@ -842,20 +842,26 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, ...@@ -842,20 +842,26 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
if (err) { if (err) {
if (err == -EAGAIN)
goto out_not_moved;
if (err < 0) if (err < 0)
goto out_error; goto out_error;
if (err == 1) if (err == 2) {
/* Target PEB write error, torture it */
torture = 1;
goto out_not_moved; goto out_not_moved;
}
/* /*
* For some reason the LEB was not moved - it might be because * The LEB has not been moved because the volume is being
* the volume is being deleted. We should prevent this PEB from * deleted or the PEB has been put meanwhile. We should prevent
* being selected for wear-levelling movement for some "time", * this PEB from being selected for wear-leveling movement
* so put it to the protection tree. * again, so put it to the protection tree.
*/ */
dbg_wl("cancelled moving PEB %d", e1->pnum); dbg_wl("canceled moving PEB %d", e1->pnum);
ubi_assert(err == 1);
pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS);
if (!pe) { if (!pe) {
err = -ENOMEM; err = -ENOMEM;
...@@ -920,9 +926,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, ...@@ -920,9 +926,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
/* /*
* For some reasons the LEB was not moved, might be an error, might be * For some reasons the LEB was not moved, might be an error, might be
* something else. @e1 was not changed, so return it back. @e2 might * something else. @e1 was not changed, so return it back. @e2 might
* be changed, schedule it for erasure. * have been changed, schedule it for erasure.
*/ */
out_not_moved: out_not_moved:
dbg_wl("canceled moving PEB %d", e1->pnum);
ubi_free_vid_hdr(ubi, vid_hdr); ubi_free_vid_hdr(ubi, vid_hdr);
vid_hdr = NULL; vid_hdr = NULL;
spin_lock(&ubi->wl_lock); spin_lock(&ubi->wl_lock);
...@@ -930,12 +937,13 @@ out_not_moved: ...@@ -930,12 +937,13 @@ out_not_moved:
wl_tree_add(e1, &ubi->scrub); wl_tree_add(e1, &ubi->scrub);
else else
wl_tree_add(e1, &ubi->used); wl_tree_add(e1, &ubi->used);
ubi_assert(!ubi->move_to_put);
ubi->move_from = ubi->move_to = NULL; ubi->move_from = ubi->move_to = NULL;
ubi->move_to_put = ubi->wl_scheduled = 0; ubi->wl_scheduled = 0;
spin_unlock(&ubi->wl_lock); spin_unlock(&ubi->wl_lock);
e1 = NULL; e1 = NULL;
err = schedule_erase(ubi, e2, 0); err = schedule_erase(ubi, e2, torture);
if (err) if (err)
goto out_error; goto out_error;
...@@ -1324,7 +1332,7 @@ int ubi_wl_flush(struct ubi_device *ubi) ...@@ -1324,7 +1332,7 @@ int ubi_wl_flush(struct ubi_device *ubi)
up_write(&ubi->work_sem); up_write(&ubi->work_sem);
/* /*
* And in case last was the WL worker and it cancelled the LEB * And in case last was the WL worker and it canceled the LEB
* movement, flush again. * movement, flush again.
*/ */
while (ubi->works_count) { while (ubi->works_count) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment