Commit ab048fb1 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://oss.sgi.com:8090/xfs/linux-2.6

* git://oss.sgi.com:8090/xfs/linux-2.6:
  [XFS] Don't do I/O beyond eof when unreserving space
  [XFS] Fix use-after-free with buffers
  [XFS] Prevent lockdep false positives when locking two inodes.
  [XFS] Fix barrier status change detection.
  [XFS] Prevent direct I/O from mapping extents beyond eof
  [XFS] Fix regression introduced by remount fixup
  [XFS] Move memory allocations for log tracing out of the critical path
parents 5a0cd4eb 2fd6f6ec
...@@ -1338,6 +1338,10 @@ __xfs_get_blocks( ...@@ -1338,6 +1338,10 @@ __xfs_get_blocks(
offset = (xfs_off_t)iblock << inode->i_blkbits; offset = (xfs_off_t)iblock << inode->i_blkbits;
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
size = bh_result->b_size; size = bh_result->b_size;
if (!create && direct && offset >= i_size_read(inode))
return 0;
error = xfs_iomap(XFS_I(inode), offset, size, error = xfs_iomap(XFS_I(inode), offset, size,
create ? flags : BMAPI_READ, &iomap, &niomap); create ? flags : BMAPI_READ, &iomap, &niomap);
if (error) if (error)
......
...@@ -1302,9 +1302,29 @@ xfs_fs_remount( ...@@ -1302,9 +1302,29 @@ xfs_fs_remount(
mp->m_flags &= ~XFS_MOUNT_BARRIER; mp->m_flags &= ~XFS_MOUNT_BARRIER;
break; break;
default: default:
/*
* Logically we would return an error here to prevent
* users from believing they might have changed
* mount options using remount which can't be changed.
*
* But unfortunately mount(8) adds all options from
* mtab and fstab to the mount arguments in some cases
* so we can't blindly reject options, but have to
* check for each specified option if it actually
* differs from the currently set option and only
* reject it if that's the case.
*
* Until that is implemented we return success for
* every remount request, and silently ignore all
* options that we can't actually change.
*/
#if 0
printk(KERN_INFO printk(KERN_INFO
"XFS: mount option \"%s\" not supported for remount\n", p); "XFS: mount option \"%s\" not supported for remount\n", p);
return -EINVAL; return -EINVAL;
#else
return 0;
#endif
} }
} }
......
...@@ -732,6 +732,7 @@ xfs_buf_item_init( ...@@ -732,6 +732,7 @@ xfs_buf_item_init(
bip->bli_item.li_ops = &xfs_buf_item_ops; bip->bli_item.li_ops = &xfs_buf_item_ops;
bip->bli_item.li_mountp = mp; bip->bli_item.li_mountp = mp;
bip->bli_buf = bp; bip->bli_buf = bp;
xfs_buf_hold(bp);
bip->bli_format.blf_type = XFS_LI_BUF; bip->bli_format.blf_type = XFS_LI_BUF;
bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
...@@ -867,6 +868,21 @@ xfs_buf_item_dirty( ...@@ -867,6 +868,21 @@ xfs_buf_item_dirty(
return (bip->bli_flags & XFS_BLI_DIRTY); return (bip->bli_flags & XFS_BLI_DIRTY);
} }
STATIC void
xfs_buf_item_free(
xfs_buf_log_item_t *bip)
{
#ifdef XFS_TRANS_DEBUG
kmem_free(bip->bli_orig);
kmem_free(bip->bli_logged);
#endif /* XFS_TRANS_DEBUG */
#ifdef XFS_BLI_TRACE
ktrace_free(bip->bli_trace);
#endif
kmem_zone_free(xfs_buf_item_zone, bip);
}
/* /*
* This is called when the buf log item is no longer needed. It should * This is called when the buf log item is no longer needed. It should
* free the buf log item associated with the given buffer and clear * free the buf log item associated with the given buffer and clear
...@@ -887,18 +903,8 @@ xfs_buf_item_relse( ...@@ -887,18 +903,8 @@ xfs_buf_item_relse(
(XFS_BUF_IODONE_FUNC(bp) != NULL)) { (XFS_BUF_IODONE_FUNC(bp) != NULL)) {
XFS_BUF_CLR_IODONE_FUNC(bp); XFS_BUF_CLR_IODONE_FUNC(bp);
} }
xfs_buf_rele(bp);
#ifdef XFS_TRANS_DEBUG xfs_buf_item_free(bip);
kmem_free(bip->bli_orig);
bip->bli_orig = NULL;
kmem_free(bip->bli_logged);
bip->bli_logged = NULL;
#endif /* XFS_TRANS_DEBUG */
#ifdef XFS_BLI_TRACE
ktrace_free(bip->bli_trace);
#endif
kmem_zone_free(xfs_buf_item_zone, bip);
} }
...@@ -1120,6 +1126,7 @@ xfs_buf_iodone( ...@@ -1120,6 +1126,7 @@ xfs_buf_iodone(
ASSERT(bip->bli_buf == bp); ASSERT(bip->bli_buf == bp);
xfs_buf_rele(bp);
mp = bip->bli_item.li_mountp; mp = bip->bli_item.li_mountp;
/* /*
...@@ -1136,18 +1143,7 @@ xfs_buf_iodone( ...@@ -1136,18 +1143,7 @@ xfs_buf_iodone(
* xfs_trans_delete_ail() drops the AIL lock. * xfs_trans_delete_ail() drops the AIL lock.
*/ */
xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
xfs_buf_item_free(bip);
#ifdef XFS_TRANS_DEBUG
kmem_free(bip->bli_orig);
bip->bli_orig = NULL;
kmem_free(bip->bli_logged);
bip->bli_logged = NULL;
#endif /* XFS_TRANS_DEBUG */
#ifdef XFS_BLI_TRACE
ktrace_free(bip->bli_trace);
#endif
kmem_zone_free(xfs_buf_item_zone, bip);
} }
#if defined(XFS_BLI_TRACE) #if defined(XFS_BLI_TRACE)
......
...@@ -149,7 +149,14 @@ xfs_swap_extents( ...@@ -149,7 +149,14 @@ xfs_swap_extents(
sbp = &sxp->sx_stat; sbp = &sxp->sx_stat;
xfs_lock_two_inodes(ip, tip, lock_flags); /*
* we have to do two separate lock calls here to keep lockdep
* happy. If we try to get all the locks in one call, lock will
* report false positives when we drop the ILOCK and regain them
* below.
*/
xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
locked = 1; locked = 1;
/* Verify that both files have the same format */ /* Verify that both files have the same format */
......
...@@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, ...@@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
STATIC int xlog_iclogs_empty(xlog_t *log); STATIC int xlog_iclogs_empty(xlog_t *log);
#if defined(XFS_LOG_TRACE) #if defined(XFS_LOG_TRACE)
#define XLOG_TRACE_LOGGRANT_SIZE 2048
#define XLOG_TRACE_ICLOG_SIZE 256
void
xlog_trace_loggrant_alloc(xlog_t *log)
{
log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
}
void
xlog_trace_loggrant_dealloc(xlog_t *log)
{
ktrace_free(log->l_grant_trace);
}
void void
xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
{ {
unsigned long cnts; unsigned long cnts;
if (!log->l_grant_trace) {
log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP);
if (!log->l_grant_trace)
return;
}
/* ticket counts are 1 byte each */ /* ticket counts are 1 byte each */
cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
...@@ -156,11 +167,21 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) ...@@ -156,11 +167,21 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
(void *)((unsigned long)tic->t_unit_res)); (void *)((unsigned long)tic->t_unit_res));
} }
void
xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
{
iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
}
void
xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
{
ktrace_free(iclog->ic_trace);
}
void void
xlog_trace_iclog(xlog_in_core_t *iclog, uint state) xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
{ {
if (!iclog->ic_trace)
iclog->ic_trace = ktrace_alloc(256, KM_NOFS);
ktrace_enter(iclog->ic_trace, ktrace_enter(iclog->ic_trace,
(void *)((unsigned long)state), (void *)((unsigned long)state),
(void *)((unsigned long)current_pid()), (void *)((unsigned long)current_pid()),
...@@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state) ...@@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
(void *)NULL, (void *)NULL); (void *)NULL, (void *)NULL);
} }
#else #else
#define xlog_trace_loggrant_alloc(log)
#define xlog_trace_loggrant_dealloc(log)
#define xlog_trace_loggrant(log,tic,string) #define xlog_trace_loggrant(log,tic,string)
#define xlog_trace_iclog_alloc(iclog)
#define xlog_trace_iclog_dealloc(iclog)
#define xlog_trace_iclog(iclog,state) #define xlog_trace_iclog(iclog,state)
#endif /* XFS_LOG_TRACE */ #endif /* XFS_LOG_TRACE */
...@@ -1009,7 +1037,7 @@ xlog_iodone(xfs_buf_t *bp) ...@@ -1009,7 +1037,7 @@ xlog_iodone(xfs_buf_t *bp)
* layer, it means the underlyin device no longer supports * layer, it means the underlyin device no longer supports
* barrier I/O. Warn loudly and turn off barriers. * barrier I/O. Warn loudly and turn off barriers.
*/ */
if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) { if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ISORDERED(bp)) {
l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
xfs_fs_cmn_err(CE_WARN, l->l_mp, xfs_fs_cmn_err(CE_WARN, l->l_mp,
"xlog_iodone: Barriers are no longer supported" "xlog_iodone: Barriers are no longer supported"
...@@ -1231,6 +1259,7 @@ xlog_alloc_log(xfs_mount_t *mp, ...@@ -1231,6 +1259,7 @@ xlog_alloc_log(xfs_mount_t *mp,
spin_lock_init(&log->l_grant_lock); spin_lock_init(&log->l_grant_lock);
sv_init(&log->l_flush_wait, 0, "flush_wait"); sv_init(&log->l_flush_wait, 0, "flush_wait");
xlog_trace_loggrant_alloc(log);
/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
...@@ -1285,6 +1314,8 @@ xlog_alloc_log(xfs_mount_t *mp, ...@@ -1285,6 +1314,8 @@ xlog_alloc_log(xfs_mount_t *mp,
sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
xlog_trace_iclog_alloc(iclog);
iclogp = &iclog->ic_next; iclogp = &iclog->ic_next;
} }
*iclogp = log->l_iclog; /* complete ring */ *iclogp = log->l_iclog; /* complete ring */
...@@ -1565,11 +1596,7 @@ xlog_dealloc_log(xlog_t *log) ...@@ -1565,11 +1596,7 @@ xlog_dealloc_log(xlog_t *log)
sv_destroy(&iclog->ic_force_wait); sv_destroy(&iclog->ic_force_wait);
sv_destroy(&iclog->ic_write_wait); sv_destroy(&iclog->ic_write_wait);
xfs_buf_free(iclog->ic_bp); xfs_buf_free(iclog->ic_bp);
#ifdef XFS_LOG_TRACE xlog_trace_iclog_dealloc(iclog);
if (iclog->ic_trace != NULL) {
ktrace_free(iclog->ic_trace);
}
#endif
next_iclog = iclog->ic_next; next_iclog = iclog->ic_next;
kmem_free(iclog); kmem_free(iclog);
iclog = next_iclog; iclog = next_iclog;
...@@ -1578,14 +1605,7 @@ xlog_dealloc_log(xlog_t *log) ...@@ -1578,14 +1605,7 @@ xlog_dealloc_log(xlog_t *log)
spinlock_destroy(&log->l_grant_lock); spinlock_destroy(&log->l_grant_lock);
xfs_buf_free(log->l_xbuf); xfs_buf_free(log->l_xbuf);
#ifdef XFS_LOG_TRACE xlog_trace_loggrant_dealloc(log);
if (log->l_trace != NULL) {
ktrace_free(log->l_trace);
}
if (log->l_grant_trace != NULL) {
ktrace_free(log->l_grant_trace);
}
#endif
log->l_mp->m_log = NULL; log->l_mp->m_log = NULL;
kmem_free(log); kmem_free(log);
} /* xlog_dealloc_log */ } /* xlog_dealloc_log */
......
...@@ -448,7 +448,6 @@ typedef struct log { ...@@ -448,7 +448,6 @@ typedef struct log {
int l_grant_write_bytes; int l_grant_write_bytes;
#ifdef XFS_LOG_TRACE #ifdef XFS_LOG_TRACE
struct ktrace *l_trace;
struct ktrace *l_grant_trace; struct ktrace *l_grant_trace;
#endif #endif
......
...@@ -1838,6 +1838,12 @@ again: ...@@ -1838,6 +1838,12 @@ again:
#endif #endif
} }
/*
* xfs_lock_two_inodes() can only be used to lock one type of lock
* at a time - the iolock or the ilock, but not both at once. If
* we lock both at once, lockdep will report false positives saying
* we have violated locking orders.
*/
void void
xfs_lock_two_inodes( xfs_lock_two_inodes(
xfs_inode_t *ip0, xfs_inode_t *ip0,
...@@ -1848,6 +1854,8 @@ xfs_lock_two_inodes( ...@@ -1848,6 +1854,8 @@ xfs_lock_two_inodes(
int attempts = 0; int attempts = 0;
xfs_log_item_t *lp; xfs_log_item_t *lp;
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
ASSERT(ip0->i_ino != ip1->i_ino); ASSERT(ip0->i_ino != ip1->i_ino);
if (ip0->i_ino > ip1->i_ino) { if (ip0->i_ino > ip1->i_ino) {
...@@ -3152,6 +3160,13 @@ error1: /* Just cancel transaction */ ...@@ -3152,6 +3160,13 @@ error1: /* Just cancel transaction */
/* /*
* Zero file bytes between startoff and endoff inclusive. * Zero file bytes between startoff and endoff inclusive.
* The iolock is held exclusive and no blocks are buffered. * The iolock is held exclusive and no blocks are buffered.
*
* This function is used by xfs_free_file_space() to zero
* partial blocks when the range to free is not block aligned.
* When unreserving space with boundaries that are not block
* aligned we round up the start and round down the end
* boundaries and then use this function to zero the parts of
* the blocks that got dropped during the rounding.
*/ */
STATIC int STATIC int
xfs_zero_remaining_bytes( xfs_zero_remaining_bytes(
...@@ -3168,6 +3183,17 @@ xfs_zero_remaining_bytes( ...@@ -3168,6 +3183,17 @@ xfs_zero_remaining_bytes(
int nimap; int nimap;
int error = 0; int error = 0;
/*
* Avoid doing I/O beyond eof - it's not necessary
* since nothing can read beyond eof. The space will
* be zeroed when the file is extended anyway.
*/
if (startoff >= ip->i_size)
return 0;
if (endoff > ip->i_size)
endoff = ip->i_size;
bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
XFS_IS_REALTIME_INODE(ip) ? XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp); mp->m_rtdev_targp : mp->m_ddev_targp);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment