Commit af84b99f authored by Linus Torvalds's avatar Linus Torvalds

Merge git://oss.sgi.com:8090/xfs/xfs-2.6

* git://oss.sgi.com:8090/xfs/xfs-2.6:
  [XFS] Fix a bad pointer dereference in the quota statvfs handling.
  [XFS] Fix xfs_splice_write() so appended data gets to disk.
  [XFS] Fix ABBA deadlock between i_mutex and iolock. Avoid calling
  [XFS] Prevent free space oversubscription and xfssyncd looping.
parents 05ff0e29 0edc7d0f
...@@ -1390,11 +1390,19 @@ xfs_vm_direct_IO( ...@@ -1390,11 +1390,19 @@ xfs_vm_direct_IO(
iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
ret = blockdev_direct_IO_own_locking(rw, iocb, inode, if (rw == WRITE) {
iomap.iomap_target->bt_bdev, ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
iov, offset, nr_segs, iomap.iomap_target->bt_bdev,
xfs_get_blocks_direct, iov, offset, nr_segs,
xfs_end_io_direct); xfs_get_blocks_direct,
xfs_end_io_direct);
} else {
ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
iomap.iomap_target->bt_bdev,
iov, offset, nr_segs,
xfs_get_blocks_direct,
xfs_end_io_direct);
}
if (unlikely(ret <= 0 && iocb->private)) if (unlikely(ret <= 0 && iocb->private))
xfs_destroy_ioend(iocb->private); xfs_destroy_ioend(iocb->private);
......
...@@ -264,7 +264,9 @@ xfs_read( ...@@ -264,7 +264,9 @@ xfs_read(
dmflags, &locktype); dmflags, &locktype);
if (ret) { if (ret) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED); xfs_iunlock(ip, XFS_IOLOCK_SHARED);
goto unlock_mutex; if (unlikely(ioflags & IO_ISDIRECT))
mutex_unlock(&inode->i_mutex);
return ret;
} }
} }
...@@ -272,6 +274,9 @@ xfs_read( ...@@ -272,6 +274,9 @@ xfs_read(
bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)),
-1, FI_REMAPF_LOCKED); -1, FI_REMAPF_LOCKED);
if (unlikely(ioflags & IO_ISDIRECT))
mutex_unlock(&inode->i_mutex);
xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
(void *)iovp, segs, *offset, ioflags); (void *)iovp, segs, *offset, ioflags);
ret = __generic_file_aio_read(iocb, iovp, segs, offset); ret = __generic_file_aio_read(iocb, iovp, segs, offset);
...@@ -281,10 +286,6 @@ xfs_read( ...@@ -281,10 +286,6 @@ xfs_read(
XFS_STATS_ADD(xs_read_bytes, ret); XFS_STATS_ADD(xs_read_bytes, ret);
xfs_iunlock(ip, XFS_IOLOCK_SHARED); xfs_iunlock(ip, XFS_IOLOCK_SHARED);
unlock_mutex:
if (unlikely(ioflags & IO_ISDIRECT))
mutex_unlock(&inode->i_mutex);
return ret; return ret;
} }
...@@ -390,6 +391,8 @@ xfs_splice_write( ...@@ -390,6 +391,8 @@ xfs_splice_write(
xfs_inode_t *ip = XFS_BHVTOI(bdp); xfs_inode_t *ip = XFS_BHVTOI(bdp);
xfs_mount_t *mp = ip->i_mount; xfs_mount_t *mp = ip->i_mount;
ssize_t ret; ssize_t ret;
struct inode *inode = outfilp->f_mapping->host;
xfs_fsize_t isize;
XFS_STATS_INC(xs_write_calls); XFS_STATS_INC(xs_write_calls);
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) if (XFS_FORCED_SHUTDOWN(ip->i_mount))
...@@ -416,6 +419,20 @@ xfs_splice_write( ...@@ -416,6 +419,20 @@ xfs_splice_write(
if (ret > 0) if (ret > 0)
XFS_STATS_ADD(xs_write_bytes, ret); XFS_STATS_ADD(xs_write_bytes, ret);
isize = i_size_read(inode);
if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
*ppos = isize;
if (*ppos > ip->i_d.di_size) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (*ppos > ip->i_d.di_size) {
ip->i_d.di_size = *ppos;
i_size_write(inode, *ppos);
ip->i_update_core = 1;
ip->i_update_size = 1;
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret; return ret;
} }
......
...@@ -203,7 +203,7 @@ xfs_qm_statvfs( ...@@ -203,7 +203,7 @@ xfs_qm_statvfs(
if (error || !vnode) if (error || !vnode)
return error; return error;
mp = XFS_BHVTOM(bhv); mp = xfs_vfstom(bhvtovfs(bhv));
ip = xfs_vtoi(vnode); ip = xfs_vtoi(vnode);
if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
......
...@@ -43,6 +43,26 @@ typedef enum xfs_alloctype ...@@ -43,6 +43,26 @@ typedef enum xfs_alloctype
#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ #define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
/*
* In order to avoid ENOSPC-related deadlock caused by
* out-of-order locking of AGF buffer (PV 947395), we place
* constraints on the relationship among actual allocations for
* data blocks, freelist blocks, and potential file data bmap
* btree blocks. However, these restrictions may result in no
* actual space allocated for a delayed extent, for example, a data
* block in a certain AG is allocated but there is no additional
* block for the additional bmap btree block due to a split of the
* bmap btree of the file. The result of this may lead to an
* infinite loop in xfssyncd when the file gets flushed to disk and
* all delayed extents need to be actually allocated. To get around
* this, we explicitly set aside a few blocks which will not be
* reserved in delayed allocation. Considering the minimum number of
* needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
* btree requires 1 fsb, so we set the number of set-aside blocks
* to 4 + 4*agcount.
*/
#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
/* /*
* Argument structure for xfs_alloc routines. * Argument structure for xfs_alloc routines.
* This is turned into a structure to avoid having 20 arguments passed * This is turned into a structure to avoid having 20 arguments passed
......
...@@ -462,7 +462,7 @@ xfs_fs_counts( ...@@ -462,7 +462,7 @@ xfs_fs_counts(
xfs_icsb_sync_counters_lazy(mp); xfs_icsb_sync_counters_lazy(mp);
s = XFS_SB_LOCK(mp); s = XFS_SB_LOCK(mp);
cnt->freedata = mp->m_sb.sb_fdblocks; cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
cnt->freertx = mp->m_sb.sb_frextents; cnt->freertx = mp->m_sb.sb_frextents;
cnt->freeino = mp->m_sb.sb_ifree; cnt->freeino = mp->m_sb.sb_ifree;
cnt->allocino = mp->m_sb.sb_icount; cnt->allocino = mp->m_sb.sb_icount;
...@@ -519,15 +519,19 @@ xfs_reserve_blocks( ...@@ -519,15 +519,19 @@ xfs_reserve_blocks(
} }
mp->m_resblks = request; mp->m_resblks = request;
} else { } else {
__int64_t free;
free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
delta = request - mp->m_resblks; delta = request - mp->m_resblks;
lcounter = mp->m_sb.sb_fdblocks - delta; lcounter = free - delta;
if (lcounter < 0) { if (lcounter < 0) {
/* We can't satisfy the request, just get what we can */ /* We can't satisfy the request, just get what we can */
mp->m_resblks += mp->m_sb.sb_fdblocks; mp->m_resblks += free;
mp->m_resblks_avail += mp->m_sb.sb_fdblocks; mp->m_resblks_avail += free;
mp->m_sb.sb_fdblocks = 0; mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
} else { } else {
mp->m_sb.sb_fdblocks = lcounter; mp->m_sb.sb_fdblocks =
lcounter + XFS_ALLOC_SET_ASIDE(mp);
mp->m_resblks = request; mp->m_resblks = request;
mp->m_resblks_avail += delta; mp->m_resblks_avail += delta;
} }
......
...@@ -1243,24 +1243,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) ...@@ -1243,24 +1243,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
xfs_trans_log_buf(tp, bp, first, last); xfs_trans_log_buf(tp, bp, first, last);
} }
/*
* In order to avoid ENOSPC-related deadlock caused by
* out-of-order locking of AGF buffer (PV 947395), we place
* constraints on the relationship among actual allocations for
* data blocks, freelist blocks, and potential file data bmap
* btree blocks. However, these restrictions may result in no
* actual space allocated for a delayed extent, for example, a data
* block in a certain AG is allocated but there is no additional
* block for the additional bmap btree block due to a split of the
* bmap btree of the file. The result of this may lead to an
* infinite loop in xfssyncd when the file gets flushed to disk and
* all delayed extents need to be actually allocated. To get around
* this, we explicitly set aside a few blocks which will not be
* reserved in delayed allocation. Considering the minimum number of
* needed freelist blocks is 4 fsbs, a potential split of file's bmap
* btree requires 1 fsb, so we set the number of set-aside blocks to 8.
*/
#define SET_ASIDE_BLOCKS 8
/* /*
* xfs_mod_incore_sb_unlocked() is a utility routine common used to apply * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
...@@ -1306,7 +1288,8 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, ...@@ -1306,7 +1288,8 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
return 0; return 0;
case XFS_SBS_FDBLOCKS: case XFS_SBS_FDBLOCKS:
lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS; lcounter = (long long)
mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
if (delta > 0) { /* Putting blocks back */ if (delta > 0) { /* Putting blocks back */
...@@ -1340,7 +1323,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, ...@@ -1340,7 +1323,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
} }
} }
mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS; mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
return 0; return 0;
case XFS_SBS_FREXTENTS: case XFS_SBS_FREXTENTS:
lcounter = (long long)mp->m_sb.sb_frextents; lcounter = (long long)mp->m_sb.sb_frextents;
...@@ -2021,7 +2004,8 @@ xfs_icsb_sync_counters_lazy( ...@@ -2021,7 +2004,8 @@ xfs_icsb_sync_counters_lazy(
* when we get near ENOSPC. * when we get near ENOSPC.
*/ */
#define XFS_ICSB_INO_CNTR_REENABLE 64 #define XFS_ICSB_INO_CNTR_REENABLE 64
#define XFS_ICSB_FDBLK_CNTR_REENABLE 512 #define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
(512 + XFS_ALLOC_SET_ASIDE(mp))
STATIC void STATIC void
xfs_icsb_balance_counter( xfs_icsb_balance_counter(
xfs_mount_t *mp, xfs_mount_t *mp,
...@@ -2055,7 +2039,7 @@ xfs_icsb_balance_counter( ...@@ -2055,7 +2039,7 @@ xfs_icsb_balance_counter(
case XFS_SBS_FDBLOCKS: case XFS_SBS_FDBLOCKS:
count = mp->m_sb.sb_fdblocks; count = mp->m_sb.sb_fdblocks;
resid = do_div(count, weight); resid = do_div(count, weight);
if (count < XFS_ICSB_FDBLK_CNTR_REENABLE) if (count < XFS_ICSB_FDBLK_CNTR_REENABLE(mp))
goto out; goto out;
break; break;
default: default:
...@@ -2110,11 +2094,11 @@ again: ...@@ -2110,11 +2094,11 @@ again:
case XFS_SBS_FDBLOCKS: case XFS_SBS_FDBLOCKS:
BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
lcounter = icsbp->icsb_fdblocks; lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
lcounter += delta; lcounter += delta;
if (unlikely(lcounter < 0)) if (unlikely(lcounter < 0))
goto slow_path; goto slow_path;
icsbp->icsb_fdblocks = lcounter; icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
break; break;
default: default:
BUG(); BUG();
......
...@@ -811,7 +811,8 @@ xfs_statvfs( ...@@ -811,7 +811,8 @@ xfs_statvfs(
statp->f_bsize = sbp->sb_blocksize; statp->f_bsize = sbp->sb_blocksize;
lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
statp->f_blocks = sbp->sb_dblocks - lsize; statp->f_blocks = sbp->sb_dblocks - lsize;
statp->f_bfree = statp->f_bavail = sbp->sb_fdblocks; statp->f_bfree = statp->f_bavail =
sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
fakeinos = statp->f_bfree << sbp->sb_inopblog; fakeinos = statp->f_bfree << sbp->sb_inopblog;
#if XFS_BIG_INUMS #if XFS_BIG_INUMS
fakeinos += mp->m_inoadd; fakeinos += mp->m_inoadd;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment