Commit ea6db58f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (27 commits)
  ocfs2: Cache extent records
  ocfs2: Remember rw lock level during direct io
  ocfs2: Fix up i_blocks calculation to know about holes
  ocfs2: Fix extent lookup to return true size of holes
  ocfs2: Read from an unwritten extent returns zeros
  ocfs2: make room for unwritten extents flag
  ocfs2: Use own splice write actor
  ocfs2: Use do_sync_mapping_range() in ocfs2_zero_tail_for_truncate()
  [PATCH] Turn do_sync_file_range() into do_sync_mapping_range()
  ocfs2: zero tail of sparse files on truncate
  ocfs2: Teach ocfs2_get_block() about holes
  ocfs2: remove ocfs2_prepare_write() and ocfs2_commit_write()
  ocfs2: teach ocfs2_file_aio_write() about sparse files
  ocfs2: Turn off shared writeable mmap for local files systems with holes.
  ocfs2: abstract out allocation locking
  ocfs2: teach extend/truncate about sparse files
  ocfs2: temporarily remove extent map caching
  ocfs2: sparse b-tree support
  ocfs2: small cleanup of ocfs2_request_delete()
  ocfs2: remove unused code
  ...
parents c58b8e4a 83418978
This diff is collapsed.
......@@ -31,7 +31,8 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
struct buffer_head *fe_bh,
u64 blkno,
u32 cpos,
u64 start_blk,
u32 new_clusters,
struct ocfs2_alloc_context *meta_ac);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
......@@ -70,6 +71,8 @@ struct ocfs2_truncate_context {
struct buffer_head *tc_last_eb_bh;
};
int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle,
u64 new_i_size);
int ocfs2_prepare_truncate(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *fe_bh,
......@@ -79,4 +82,26 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
struct buffer_head *fe_bh,
struct ocfs2_truncate_context *tc);
int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
u32 cpos, struct buffer_head **leaf_bh);
/*
* Helper function to look at the # of clusters in an extent record.
*/
static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el,
struct ocfs2_extent_rec *rec)
{
/*
* Cluster count in extent records is slightly different
* between interior nodes and leaf nodes. This is to support
* unwritten extents which need a flags field in leaf node
* records, thus shrinking the available space for a clusters
* field.
*/
if (el->l_tree_depth)
return le32_to_cpu(rec->e_int_clusters);
else
return le16_to_cpu(rec->e_leaf_clusters);
}
#endif /* OCFS2_ALLOC_H */
This diff is collapsed.
......@@ -30,12 +30,83 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
unsigned from,
unsigned to);
int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
struct inode *inode, unsigned int from,
unsigned int to, int new);
int walk_page_buffers( handle_t *handle,
struct buffer_head *head,
unsigned from,
unsigned to,
int *partial,
int (*fn)( handle_t *handle,
struct buffer_head *bh));
struct ocfs2_write_ctxt;
typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *,
u64 *, unsigned int *, unsigned int *);
ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
size_t count, ocfs2_page_writer *actor,
void *priv);
struct ocfs2_write_ctxt {
size_t w_count;
loff_t w_pos;
u32 w_cpos;
unsigned int w_finished_copy;
/* This is true if page_size > cluster_size */
unsigned int w_large_pages;
/* Filler callback and private data */
ocfs2_page_writer *w_write_data_page;
void *w_private;
/* Only valid for the filler callback */
struct page *w_this_page;
unsigned int w_this_page_new;
};
struct ocfs2_buffered_write_priv {
char *b_src_buf;
const struct iovec *b_cur_iov; /* Current iovec */
size_t b_cur_off; /* Offset in the
* current iovec */
};
int ocfs2_map_and_write_user_data(struct inode *inode,
struct ocfs2_write_ctxt *wc,
u64 *p_blkno,
unsigned int *ret_from,
unsigned int *ret_to);
struct ocfs2_splice_write_priv {
struct splice_desc *s_sd;
struct pipe_buffer *s_buf;
struct pipe_inode_info *s_pipe;
/* Neither offset value is ever larger than one page */
unsigned int s_offset;
unsigned int s_buf_offset;
};
int ocfs2_map_and_write_splice_data(struct inode *inode,
struct ocfs2_write_ctxt *wc,
u64 *p_blkno,
unsigned int *ret_from,
unsigned int *ret_to);
/* all ocfs2_dio_end_io()'s fault */
#define ocfs2_iocb_is_rw_locked(iocb) \
test_bit(0, (unsigned long *)&iocb->private)
#define ocfs2_iocb_set_rw_locked(iocb) \
set_bit(0, (unsigned long *)&iocb->private)
static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
{
set_bit(0, (unsigned long *)&iocb->private);
if (level)
set_bit(1, (unsigned long *)&iocb->private);
else
clear_bit(1, (unsigned long *)&iocb->private);
}
#define ocfs2_iocb_clear_rw_locked(iocb) \
clear_bit(0, (unsigned long *)&iocb->private)
#define ocfs2_iocb_rw_locked_level(iocb) \
test_bit(1, (unsigned long *)&iocb->private)
#endif /* OCFS2_FILE_H */
......@@ -46,6 +46,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/reboot.h>
#include "heartbeat.h"
#include "nodemanager.h"
......@@ -72,7 +73,9 @@ static void o2quo_fence_self(void)
/* panic spins with interrupts enabled. with preempt
* threads can still schedule, etc, etc */
o2hb_stop_all_regions();
panic("ocfs2 is very sorry to be fencing this system by panicing\n");
printk("ocfs2 is very sorry to be fencing this system by restarting\n");
emergency_restart();
}
/* Indicate that a timeout occured on a hearbeat region write. The
......
......@@ -38,6 +38,9 @@
* locking semantics of the file system using the protocol. It should
* be somewhere else, I'm sure, but right now it isn't.
*
* New in version 8:
* - Replace delete inode votes with a cluster lock
*
* New in version 7:
* - DLM join domain includes the live nodemap
*
......@@ -57,7 +60,7 @@
* - full 64 bit i_size in the metadata lock lvbs
* - introduction of "rw" lock and pushing meta/data locking down
*/
#define O2NET_PROTOCOL_VERSION 7ULL
#define O2NET_PROTOCOL_VERSION 8ULL
struct o2net_handshake {
__be64 protocol_version;
__be64 connector_id;
......
......@@ -358,15 +358,17 @@ int ocfs2_do_extend_dir(struct super_block *sb,
{
int status;
int extend;
u64 p_blkno;
u64 p_blkno, v_blkno;
spin_lock(&OCFS2_I(dir)->ip_lock);
extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters));
spin_unlock(&OCFS2_I(dir)->ip_lock);
if (extend) {
status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, 1,
parent_fe_bh, handle,
u32 offset = OCFS2_I(dir)->ip_clusters;
status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset,
1, parent_fe_bh, handle,
data_ac, meta_ac, NULL);
BUG_ON(status == -EAGAIN);
if (status < 0) {
......@@ -375,9 +377,8 @@ int ocfs2_do_extend_dir(struct super_block *sb,
}
}
status = ocfs2_extent_map_get_blocks(dir, (dir->i_blocks >>
(sb->s_blocksize_bits - 9)),
1, &p_blkno, NULL);
v_blkno = ocfs2_blocks_for_bytes(sb, i_size_read(dir));
status = ocfs2_extent_map_get_blocks(dir, v_blkno, &p_blkno, NULL, NULL);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -486,7 +487,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
dir_i_size += dir->i_sb->s_blocksize;
i_size_write(dir, dir_i_size);
dir->i_blocks = ocfs2_align_bytes_to_sectors(dir_i_size);
dir->i_blocks = ocfs2_inode_sector_count(dir);
status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
if (status < 0) {
mlog_errno(status);
......
......@@ -430,11 +430,10 @@ redo_bucket:
dlm_lockres_put(res);
cond_resched_lock(&dlm->spinlock);
if (dropped)
goto redo_bucket;
}
cond_resched_lock(&dlm->spinlock);
num += n;
mlog(0, "%s: touched %d lockreses in bucket %d "
"(tot=%d)\n", dlm->name, n, i, num);
......@@ -1035,7 +1034,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
{
int status = 0, tmpstat, node;
struct domain_join_ctxt *ctxt;
enum dlm_query_join_response response;
enum dlm_query_join_response response = JOIN_DISALLOW;
mlog_entry("%p", dlm);
......
......@@ -611,6 +611,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
}
} while (status != 0);
spin_lock(&dlm_reco_state_lock);
switch (ndata->state) {
case DLM_RECO_NODE_DATA_INIT:
case DLM_RECO_NODE_DATA_FINALIZE_SENT:
......@@ -641,6 +642,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
ndata->node_num, dead_node);
break;
}
spin_unlock(&dlm_reco_state_lock);
}
mlog(0, "done requesting all lock info\n");
......
......@@ -225,11 +225,17 @@ static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
.flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
.get_osb = ocfs2_get_inode_osb,
.flags = 0,
};
static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
{
return lockres->l_type == OCFS2_LOCK_TYPE_META ||
lockres->l_type == OCFS2_LOCK_TYPE_DATA ||
lockres->l_type == OCFS2_LOCK_TYPE_RW;
lockres->l_type == OCFS2_LOCK_TYPE_RW ||
lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
}
static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
......@@ -373,6 +379,9 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
case OCFS2_LOCK_TYPE_DATA:
ops = &ocfs2_inode_data_lops;
break;
case OCFS2_LOCK_TYPE_OPEN:
ops = &ocfs2_inode_open_lops;
break;
default:
mlog_bug_on_msg(1, "type: %d\n", type);
ops = NULL; /* thanks, gcc */
......@@ -1129,6 +1138,12 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
goto bail;
}
ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
if (ret) {
mlog_errno(ret);
goto bail;
}
bail:
mlog_exit(ret);
return ret;
......@@ -1182,6 +1197,99 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
mlog_exit_void();
}
/*
* ocfs2_open_lock always get PR mode lock.
*/
int ocfs2_open_lock(struct inode *inode)
{
int status = 0;
struct ocfs2_lock_res *lockres;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
BUG_ON(!inode);
mlog_entry_void();
mlog(0, "inode %llu take PRMODE open lock\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
if (ocfs2_mount_local(osb))
goto out;
lockres = &OCFS2_I(inode)->ip_open_lockres;
status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
LKM_PRMODE, 0, 0);
if (status < 0)
mlog_errno(status);
out:
mlog_exit(status);
return status;
}
int ocfs2_try_open_lock(struct inode *inode, int write)
{
int status = 0, level;
struct ocfs2_lock_res *lockres;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
BUG_ON(!inode);
mlog_entry_void();
mlog(0, "inode %llu try to take %s open lock\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
write ? "EXMODE" : "PRMODE");
if (ocfs2_mount_local(osb))
goto out;
lockres = &OCFS2_I(inode)->ip_open_lockres;
level = write ? LKM_EXMODE : LKM_PRMODE;
/*
* The file system may already holding a PRMODE/EXMODE open lock.
* Since we pass LKM_NOQUEUE, the request won't block waiting on
* other nodes and the -EAGAIN will indicate to the caller that
* this inode is still in use.
*/
status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
level, LKM_NOQUEUE, 0);
out:
mlog_exit(status);
return status;
}
/*
* ocfs2_open_unlock unlock PR and EX mode open locks.
*/
void ocfs2_open_unlock(struct inode *inode)
{
struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
mlog_entry_void();
mlog(0, "inode %llu drop open lock\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
if (ocfs2_mount_local(osb))
goto out;
if(lockres->l_ro_holders)
ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
LKM_PRMODE);
if(lockres->l_ex_holders)
ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
LKM_EXMODE);
out:
mlog_exit_void();
}
int ocfs2_data_lock_full(struct inode *inode,
int write,
int arg_flags)
......@@ -1387,8 +1495,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
inode->i_blocks = 0;
else
inode->i_blocks =
ocfs2_align_bytes_to_sectors(i_size_read(inode));
inode->i_blocks = ocfs2_inode_sector_count(inode);
inode->i_uid = be32_to_cpu(lvb->lvb_iuid);
inode->i_gid = be32_to_cpu(lvb->lvb_igid);
......@@ -1479,12 +1586,15 @@ static int ocfs2_meta_lock_update(struct inode *inode,
{
int status = 0;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_lock_res *lockres = NULL;
struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres;
struct ocfs2_dinode *fe;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
mlog_entry_void();
if (ocfs2_mount_local(osb))
goto bail;
spin_lock(&oi->ip_lock);
if (oi->ip_flags & OCFS2_INODE_DELETED) {
mlog(0, "Orphaned inode %llu was deleted while we "
......@@ -1496,22 +1606,16 @@ static int ocfs2_meta_lock_update(struct inode *inode,
}
spin_unlock(&oi->ip_lock);
if (!ocfs2_mount_local(osb)) {
lockres = &oi->ip_meta_lockres;
if (!ocfs2_should_refresh_lock_res(lockres))
goto bail;
}
if (!ocfs2_should_refresh_lock_res(lockres))
goto bail;
/* This will discard any caching information we might have had
* for the inode metadata. */
ocfs2_metadata_cache_purge(inode);
/* will do nothing for inode types that don't use the extent
* map (directories, bitmap files, etc) */
ocfs2_extent_map_trunc(inode, 0);
if (lockres && ocfs2_meta_lvb_is_trustable(inode, lockres)) {
if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
mlog(0, "Trusting LVB on inode %llu\n",
(unsigned long long)oi->ip_blkno);
ocfs2_refresh_inode_from_lvb(inode);
......@@ -1558,8 +1662,7 @@ static int ocfs2_meta_lock_update(struct inode *inode,
status = 0;
bail_refresh:
if (lockres)
ocfs2_complete_lock_res_refresh(lockres, status);
ocfs2_complete_lock_res_refresh(lockres, status);
bail:
mlog_exit(status);
return status;
......@@ -1630,7 +1733,6 @@ int ocfs2_meta_lock_full(struct inode *inode,
wait_event(osb->recovery_event,
ocfs2_node_map_is_empty(osb, &osb->recovery_map));
acquired = 0;
lockres = &OCFS2_I(inode)->ip_meta_lockres;
level = ex ? LKM_EXMODE : LKM_PRMODE;
dlm_flags = 0;
......@@ -2458,12 +2560,19 @@ int ocfs2_drop_inode_locks(struct inode *inode)
* ocfs2_clear_inode has done it for us. */
err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
&OCFS2_I(inode)->ip_data_lockres);
&OCFS2_I(inode)->ip_open_lockres);
if (err < 0)
mlog_errno(err);
status = err;
err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
&OCFS2_I(inode)->ip_data_lockres);
if (err < 0)
mlog_errno(err);
if (err < 0 && !status)
status = err;
err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
&OCFS2_I(inode)->ip_meta_lockres);
if (err < 0)
......
......@@ -80,6 +80,9 @@ void ocfs2_data_unlock(struct inode *inode,
int write);
int ocfs2_rw_lock(struct inode *inode, int write);
void ocfs2_rw_unlock(struct inode *inode, int write);
int ocfs2_open_lock(struct inode *inode);
int ocfs2_try_open_lock(struct inode *inode, int write);
void ocfs2_open_unlock(struct inode *inode);
int ocfs2_meta_lock_atime(struct inode *inode,
struct vfsmount *vfsmnt,
int *level);
......
This diff is collapsed.
......@@ -25,22 +25,29 @@
#ifndef _EXTENT_MAP_H
#define _EXTENT_MAP_H
int init_ocfs2_extent_maps(void);
void exit_ocfs2_extent_maps(void);
struct ocfs2_extent_map_item {
unsigned int ei_cpos;
unsigned int ei_phys;
unsigned int ei_clusters;
unsigned int ei_flags;
/*
* EVERY CALL here except _init, _trunc, and _drop expects alloc_sem
* to be held. The allocation cannot change at all while the map is
* in the process of being updated.
*/
int ocfs2_extent_map_init(struct inode *inode);
int ocfs2_extent_map_append(struct inode *inode,
struct ocfs2_extent_rec *rec,
u32 new_clusters);
int ocfs2_extent_map_get_blocks(struct inode *inode,
u64 v_blkno, int count,
u64 *p_blkno, int *ret_count);
int ocfs2_extent_map_drop(struct inode *inode, u32 new_clusters);
int ocfs2_extent_map_trunc(struct inode *inode, u32 new_clusters);
struct list_head ei_list;
};
#define OCFS2_MAX_EXTENT_MAP_ITEMS 3
struct ocfs2_extent_map {
unsigned int em_num_items;
struct list_head em_list;
};
void ocfs2_extent_map_init(struct inode *inode);
void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cluster);
void ocfs2_extent_map_insert_rec(struct inode *inode,
struct ocfs2_extent_rec *rec);
int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster,
u32 *num_clusters, unsigned int *extent_flags);
int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
u64 *ret_count, unsigned int *extent_flags);
#endif /* _EXTENT_MAP_H */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment