Commit 65b0b44d authored by Philipp Reisner's avatar Philipp Reisner

Tracking DRBD mainline (and minor cleanups)

   * drbd-8.3: (134 commits)
      Missing pices of the unaligned memory access stuff.
      possible fix for XEN crashes on disconnect
      fix regression: initial sync target hung in WFBitMapT
      fix a comment: there are no more ioctls.
      possible fix for XEN crashes on disconnect
      fix regression: initial sync target hung in WFBitMapT
      ...

Removed compat code from lru_cache.h
All STATIC -> static
DRBD_ENABLE_FAULTS -> CONFIG_DRBD_FAULT_INJECTION

    * drbd-8.3:
      Fixed some errors/warnings when compiles without DBG_ALL_SYMBOLS (i.e. STATIC = static)
      Fixed a regression introduced with fb51e2eb1fac83839231499333bf683629388484

No longer include drbd_config.h directly, include drbd.h instead
Got rid of drbd_config.h
Support lru_cache as module
Removing the drbd_buildtag.c file

    * drbd-8.3:
      Fixes for architectures that does not support unaligned memory accesses
      fix reading of the AL ring buffer
      sync handshake: fix detection of "unrelated" data - it was detected as "regular" split-brain

    * drbd-8.3:
      Preparing 8.3.2rc2
      compat: 2.6.31 -- q->limits.* and accessor functions
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent b8e44af9
......@@ -46,3 +46,35 @@ config DRBD_TRACE
Say Y here if you want to be able to trace various events in DRBD.
If unsure, say N.
config DRBD_FAULT_INJECTION
bool "DRBD fault injection"
depends on BLK_DEV_DRBD
help
Say Y here if you want to simulate IO errors, in order to test DRBD's
behavior.
The actual simulation of IO errors is done by writing 3 values to
/sys/module/drbd/parameters/
enable_faults: bitmask of...
1 meta data write
2 read
4 resync data write
8 read
16 data write
32 data read
64 read ahead
128 kmalloc of bitmap
256 allocation of EE (epoch_entries)
fault_devs: bitmask of minor numbers
fault_rate: frequency in percent
Example: Simulate data write errors on /dev/drbd0 with a probability of 5%.
echo 16 > /sys/module/drbd/parameters/enable_faults
echo 1 > /sys/module/drbd/parameters/fault_devs
echo 5 > /sys/module/drbd/parameters/fault_rate
If unsure, say N.
drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o
drbd-y := drbd_bitmap.o drbd_proc.o
drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
......
......@@ -77,7 +77,7 @@ void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...)
va_end(ap);
}
STATIC int _drbd_md_sync_page_io(struct drbd_conf *mdev,
static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
struct drbd_backing_dev *bdev,
struct page *page, sector_t sector,
int rw, int size)
......@@ -133,7 +133,7 @@ STATIC int _drbd_md_sync_page_io(struct drbd_conf *mdev,
int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
sector_t sector, int rw)
{
int hardsect_size, mask, ok;
int logical_block_size, mask, ok;
int offset = 0;
struct page *iop = mdev->md_io_page;
......@@ -141,15 +141,15 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
BUG_ON(!bdev->md_bdev);
hardsect_size = drbd_get_hardsect_size(bdev->md_bdev);
if (hardsect_size == 0)
hardsect_size = MD_SECTOR_SIZE;
logical_block_size = bdev_logical_block_size(bdev->md_bdev);
if (logical_block_size == 0)
logical_block_size = MD_SECTOR_SIZE;
/* in case hardsect_size != 512 [ s390 only? ] */
if (hardsect_size != MD_SECTOR_SIZE) {
mask = (hardsect_size / MD_SECTOR_SIZE) - 1;
/* in case logical_block_size != 512 [ s390 only? ] */
if (logical_block_size != MD_SECTOR_SIZE) {
mask = (logical_block_size / MD_SECTOR_SIZE) - 1;
D_ASSERT(mask == 1 || mask == 3 || mask == 7);
D_ASSERT(hardsect_size == (mask+1) * MD_SECTOR_SIZE);
D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE);
offset = sector & mask;
sector = sector & ~mask;
iop = mdev->md_io_tmpp;
......@@ -161,11 +161,11 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
void *hp = page_address(mdev->md_io_tmpp);
ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector,
READ, hardsect_size);
READ, logical_block_size);
if (unlikely(!ok)) {
dev_err(DEV, "drbd_md_sync_page_io(,%llus,"
"READ [hardsect_size!=512]) failed!\n",
"READ [logical_block_size!=512]) failed!\n",
(unsigned long long)sector);
return 0;
}
......@@ -180,14 +180,14 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
current->comm, current->pid, __func__,
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, hardsect_size);
ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size);
if (unlikely(!ok)) {
dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n",
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
return 0;
}
if (hardsect_size != MD_SECTOR_SIZE && !(rw & WRITE)) {
if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) {
void *p = page_address(mdev->md_io_page);
void *hp = page_address(mdev->md_io_tmpp);
......@@ -378,7 +378,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
*
* Returns -1 on IO error, 0 on checksum error and 1 upon success.
*/
STATIC int drbd_al_read_tr(struct drbd_conf *mdev,
static int drbd_al_read_tr(struct drbd_conf *mdev,
struct drbd_backing_dev *bdev,
struct al_transaction *b,
int index)
......@@ -416,14 +416,14 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
int i;
int rv;
int mx;
int cnr;
int active_extents = 0;
int transactions = 0;
int overflow = 0;
int from = -1;
int to = -1;
u32 from_tnr = -1;
int found_valid = 0;
int from = 0;
int to = 0;
u32 from_tnr = 0;
u32 to_tnr = 0;
u32 cnr;
mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT);
......@@ -444,22 +444,27 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
}
cnr = be32_to_cpu(buffer->tr_number);
if (cnr == -1)
overflow = 1;
if (cnr < from_tnr && !overflow) {
if (++found_valid == 1) {
from = i;
to = i;
from_tnr = cnr;
to_tnr = cnr;
continue;
}
if ((int)cnr - (int)from_tnr < 0) {
D_ASSERT(from_tnr - cnr + i - from == mx+1);
from = i;
from_tnr = cnr;
}
if (cnr > to_tnr) {
if ((int)cnr - (int)to_tnr > 0) {
D_ASSERT(cnr - to_tnr == i - to);
to = i;
to_tnr = cnr;
}
}
if (from == -1 || to == -1) {
if (!found_valid) {
dev_warn(DEV, "No usable activity log found.\n");
mutex_unlock(&mdev->md_io_mutex);
return 1;
}
......@@ -524,7 +529,7 @@ cancel:
return 1;
}
STATIC void atodb_endio(struct bio *bio, int error)
static void atodb_endio(struct bio *bio, int error)
{
struct drbd_atodb_wait *wc = bio->bi_private;
struct drbd_conf *mdev = wc->mdev;
......@@ -555,7 +560,7 @@ STATIC void atodb_endio(struct bio *bio, int error)
#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
/* activity log to on disk bitmap -- prepare bio unless that sector
* is already covered by previously prepared bios */
STATIC int atodb_prepare_unless_covered(struct drbd_conf *mdev,
static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
struct bio **bios,
unsigned int enr,
struct drbd_atodb_wait *wc) __must_hold(local)
......@@ -803,7 +808,7 @@ void drbd_al_shrink(struct drbd_conf *mdev)
wake_up(&mdev->al_wait);
}
STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused)
static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused)
{
struct update_odbm_work *udw = (struct update_odbm_work *)w;
......@@ -840,7 +845,7 @@ STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused
*
* TODO will be obsoleted once we have a caching lru of the on disk bitmap
*/
STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
int count, int success)
{
struct lc_element *e;
......
......@@ -26,6 +26,7 @@
#include <linux/vmalloc.h>
#include <linux/string.h>
#include <linux/drbd.h>
#include <asm/kmap_types.h>
#include "drbd_int.h"
/* OPAQUE outside this file!
......@@ -150,7 +151,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
}
/* word offset to long pointer */
STATIC unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km)
static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km)
{
struct page *page;
unsigned long page_nr;
......@@ -197,7 +198,7 @@ void bm_unmap(unsigned long *p_addr)
* to be able to report device specific.
*/
STATIC void bm_free_pages(struct page **pages, unsigned long number)
static void bm_free_pages(struct page **pages, unsigned long number)
{
unsigned long i;
if (!pages)
......@@ -215,7 +216,7 @@ STATIC void bm_free_pages(struct page **pages, unsigned long number)
}
}
STATIC void bm_vk_free(void *ptr, int v)
static void bm_vk_free(void *ptr, int v)
{
if (v)
vfree(ptr);
......@@ -226,7 +227,7 @@ STATIC void bm_vk_free(void *ptr, int v)
/*
* "have" and "want" are NUMBER OF PAGES.
*/
STATIC struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
{
struct page **old_pages = b->bm_pages;
struct page **new_pages, *page;
......@@ -239,7 +240,11 @@ STATIC struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
if (have == want)
return old_pages;
/* Trying kmalloc first, falling back to vmalloc... */
/* Trying kmalloc first, falling back to vmalloc.
* GFP_KERNEL is ok, as this is done when a lower level disk is
* "attached" to the drbd. Context is receiver thread or cqueue
* thread. As we have no disk yet, we are not in the IO path,
* not even the IO path of the peer. */
bytes = sizeof(struct page *)*want;
new_pages = kmalloc(bytes, GFP_KERNEL);
if (!new_pages) {
......@@ -320,7 +325,7 @@ void drbd_bm_cleanup(struct drbd_conf *mdev)
* this masks out the remaining bits.
* Rerturns the number of bits cleared.
*/
STATIC int bm_clear_surplus(struct drbd_bitmap *b)
static int bm_clear_surplus(struct drbd_bitmap *b)
{
const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
size_t w = b->bm_bits >> LN2_BPL;
......@@ -343,7 +348,7 @@ STATIC int bm_clear_surplus(struct drbd_bitmap *b)
return cleared;
}
STATIC void bm_set_surplus(struct drbd_bitmap *b)
static void bm_set_surplus(struct drbd_bitmap *b)
{
const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
size_t w = b->bm_bits >> LN2_BPL;
......@@ -362,7 +367,7 @@ STATIC void bm_set_surplus(struct drbd_bitmap *b)
bm_unmap(p_addr);
}
STATIC unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian)
static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian)
{
unsigned long *p_addr, *bm, offset = 0;
unsigned long bits = 0;
......@@ -420,7 +425,7 @@ void _drbd_bm_recount_bits(struct drbd_conf *mdev, char *file, int line)
}
/* offset and len in long words.*/
STATIC void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
{
unsigned long *p_addr, *bm;
size_t do_now, end;
......@@ -752,7 +757,7 @@ static void bm_async_io_complete(struct bio *bio, int error)
bio_put(bio);
}
STATIC void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local)
static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local)
{
/* we are process context. we always get a bio */
struct bio *bio = bio_alloc(GFP_KERNEL, 1);
......@@ -790,6 +795,8 @@ void bm_cpu_to_lel(struct drbd_bitmap *b)
* this may be optimized by using
* cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0;
* the following is still not optimal, but better than nothing */
unsigned int i;
unsigned long *p_addr, *bm;
if (b->bm_set == 0) {
/* no page at all; avoid swap if all is 0 */
i = b->bm_number_of_pages;
......@@ -801,12 +808,10 @@ void bm_cpu_to_lel(struct drbd_bitmap *b)
i = 0;
}
for (; i < b->bm_number_of_pages; i++) {
unsigned long *bm;
/* if you'd want to use kmap_atomic, you'd have to disable irq! */
p_addr = kmap(b->bm_pages[i]);
p_addr = kmap_atomic(b->bm_pages[i], KM_USER0);
for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++)
*bm = cpu_to_lel(*bm);
kunmap(p_addr);
kunmap_atomic(p_addr, KM_USER0);
}
}
# endif
......@@ -816,7 +821,7 @@ void bm_cpu_to_lel(struct drbd_bitmap *b)
/*
* bm_rw: read/write the whole bitmap from/to its on disk location.
*/
STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
{
struct drbd_bitmap *b = mdev->bitmap;
/* sector_t sector; */
......
/* automatically generated. DO NOT EDIT. */
#include <linux/drbd_config.h>
const char *drbd_buildtag(void)
{
return "GIT-hash: b0abb3832a730d4fbd145013f6f51fc977bba3cc drbd/drbd_int.h"
" build by phil@fat-tyre, 2009-05-15 11:54:26";
}
......@@ -106,22 +106,6 @@ extern char usermode_helper[];
struct drbd_conf;
#ifdef DBG_ALL_SYMBOLS
# define STATIC
#else
# define STATIC static
#endif
/*
* Some Message Macros
*************************/
#define DUMPP(A) dev_err(DEV, #A " = %p in %s:%d\n", (A), __FILE__, __LINE__);
#define DUMPLU(A) dev_err(DEV, #A " = %lu in %s:%d\n", (unsigned long)(A), __FILE__, __LINE__);
#define DUMPLLU(A) dev_err(DEV, #A " = %llu in %s:%d\n", (unsigned long long)(A), __FILE__, __LINE__);
#define DUMPLX(A) dev_err(DEV, #A " = %lx in %s:%d\n", (A), __FILE__, __LINE__);
#define DUMPI(A) dev_err(DEV, #A " = %d in %s:%d\n", (int)(A), __FILE__, __LINE__);
/* to shorten dev_warn(DEV, "msg"); and relatives statements */
#define DEV (disk_to_dev(mdev->vdisk))
......@@ -139,14 +123,14 @@ struct drbd_conf;
/* Defines to control fault insertion */
enum {
DRBD_FAULT_MD_WR = 0, /* meta data write */
DRBD_FAULT_MD_RD, /* read */
DRBD_FAULT_RS_WR, /* resync */
DRBD_FAULT_RS_RD,
DRBD_FAULT_DT_WR, /* data */
DRBD_FAULT_DT_RD,
DRBD_FAULT_DT_RA, /* data read ahead */
DRBD_FAULT_BM_ALLOC, /* bitmap allocation */
DRBD_FAULT_AL_EE, /* alloc ee */
DRBD_FAULT_MD_RD = 1, /* read */
DRBD_FAULT_RS_WR = 2, /* resync */
DRBD_FAULT_RS_RD = 3,
DRBD_FAULT_DT_WR = 4, /* data */
DRBD_FAULT_DT_RD = 5,
DRBD_FAULT_DT_RA = 6, /* data read ahead */
DRBD_FAULT_BM_ALLOC = 7, /* bitmap allocation */
DRBD_FAULT_AL_EE = 8, /* alloc ee */
DRBD_FAULT_MAX,
};
......@@ -332,6 +316,10 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
#endif
}
#ifndef __packed
#define __packed __attribute__((packed))
#endif
/* This is the layout for a packet on the wire.
* The byteorder is the network byte order.
* (except block_id and barrier fields.
......@@ -543,6 +531,7 @@ struct p_compressed_bm {
u8 code[0];
} __packed;
/* DCBP: Drbd Compressed Bitmap Packet ... */
static inline enum drbd_bitmap_code
DCBP_get_code(struct p_compressed_bm *p)
{
......@@ -795,6 +784,8 @@ enum {
* but worker thread is still handling the cleanup.
* reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed,
* while this is set. */
RESIZE_PENDING, /* Size change detected locally, waiting for the response from
* the peer, if it changed there as well. */
};
struct drbd_bitmap; /* opaque for drbd_conf */
......@@ -946,12 +937,16 @@ struct drbd_conf {
unsigned long rs_mark_time;
/* skipped because csum was equeal [unit BM_BLOCK_SIZE] */
unsigned long rs_same_csum;
/* where does the admin want us to start? (sector) */
sector_t ov_start_sector;
/* where are we now? (sector) */
sector_t ov_position;
/* Start sector of out of sync range. */
/* Start sector of out of sync range (to merge printk reporting). */
sector_t ov_last_oos_start;
/* size of out-of-sync range in sectors. */
sector_t ov_last_oos_size;
unsigned long ov_left;
unsigned long ov_left; /* in bits */
struct crypto_hash *csums_tfm;
struct crypto_hash *verify_tfm;
......@@ -991,7 +986,7 @@ struct drbd_conf {
atomic_t pp_in_use;
wait_queue_head_t ee_wait;
struct page *md_io_page; /* one page buffer for md_io */
struct page *md_io_tmpp; /* for hardsect_size != 512 [s390 only?] */
struct page *md_io_tmpp; /* for logical_block_size != 512 */
struct mutex md_io_mutex; /* protects the md_io_buffer */
spinlock_t al_lock;
wait_queue_head_t al_wait;
......@@ -1103,7 +1098,7 @@ extern int drbd_send_protocol(struct drbd_conf *mdev);
extern int drbd_send_uuids(struct drbd_conf *mdev);
extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val);
extern int drbd_send_sizes(struct drbd_conf *mdev);
extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply);
extern int _drbd_send_state(struct drbd_conf *mdev);
extern int drbd_send_state(struct drbd_conf *mdev);
extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
......@@ -1127,8 +1122,6 @@ extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
struct p_data *dp);
extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
sector_t sector, int blksize, u64 block_id);
extern int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
int offset, size_t size);
extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
struct drbd_epoch_entry *e);
extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
......@@ -1348,7 +1341,9 @@ extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, con
/* drbd_main.c */
extern struct kmem_cache *drbd_request_cache;
extern struct kmem_cache *drbd_ee_cache;
extern struct kmem_cache *drbd_ee_cache; /* epoch entries */
extern struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
extern mempool_t *drbd_request_mempool;
extern mempool_t *drbd_ee_mempool;
......@@ -1388,7 +1383,7 @@ extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
/* drbd_worker.c */
extern int drbd_worker(struct drbd_thread *thi);
extern void drbd_alter_sa(struct drbd_conf *mdev, int na);
extern int drbd_alter_sa(struct drbd_conf *mdev, int na);
extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side);
extern void resume_next_sg(struct drbd_conf *mdev);
extern void suspend_other_sg(struct drbd_conf *mdev);
......@@ -1409,7 +1404,7 @@ static inline void ov_oos_print(struct drbd_conf *mdev)
}
void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
/* worker callbacks */
extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int);
extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int);
......@@ -1704,9 +1699,11 @@ static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
}
}
/* Returns the number of 512 byte sectors of the device */
static inline sector_t drbd_get_capacity(struct block_device *bdev)
{
return bdev ? get_capacity(bdev->bd_disk) : 0;
/* return bdev ? get_capacity(bdev->bd_disk) : 0; */
return bdev ? bdev->bd_inode->i_size >> 9 : 0;
}
/**
......
This diff is collapsed.
This diff is collapsed.
......@@ -32,11 +32,10 @@
#include <linux/slab.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/drbd_config.h>
#include <linux/drbd.h>
#include "drbd_int.h"
STATIC int drbd_proc_open(struct inode *inode, struct file *file);
static int drbd_proc_open(struct inode *inode, struct file *file);
struct proc_dir_entry *drbd_proc;
......@@ -55,7 +54,7 @@ struct file_operations drbd_proc_fops = {
* [=====>..............] 33.5% (23456/123456)
* finish: 2:20:20 speed: 6,345 (6,456) K/sec
*/
STATIC void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
{
unsigned long db, dt, dbdt, rt, rs_left;
unsigned int res;
......@@ -134,7 +133,7 @@ STATIC void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
seq_printf(seq, " K/sec\n");
}
STATIC void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
{
struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
......@@ -144,7 +143,7 @@ STATIC void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
);
}
STATIC int drbd_seq_show(struct seq_file *seq, void *v)
static int drbd_seq_show(struct seq_file *seq, void *v)
{
int i, hole = 0;
const char *sn;
......@@ -259,7 +258,7 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v)
return 0;
}
STATIC int drbd_proc_open(struct inode *inode, struct file *file)
static int drbd_proc_open(struct inode *inode, struct file *file)
{
return single_open(file, drbd_seq_show, PDE(inode)->data);
}
......
This diff is collapsed.
......@@ -121,8 +121,8 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
list_empty(&req->w.list))) {
/* DEBUG ASSERT only; if this triggers, we
* probably corrupt the worker list here */
DUMPP(req->w.list.next);
DUMPP(req->w.list.prev);
dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next);
dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev);
}
req->w.cb = w_io_error;
drbd_queue_work(&mdev->data.work, &req->w);
......@@ -326,7 +326,7 @@ void _req_may_be_done(struct drbd_request *req, int error)
* second hlist_for_each_entry becomes a noop. This is even simpler than to
* grab a reference on the net_conf, and check for the two_primaries flag...
*/
STATIC int _req_conflicts(struct drbd_request *req)
static int _req_conflicts(struct drbd_request *req)
{
struct drbd_conf *mdev = req->mdev;
const sector_t sector = req->sector;
......@@ -689,7 +689,7 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error)
* since size may be bigger than BM_BLOCK_SIZE,
* we may need to check several bits.
*/
STATIC int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
{
unsigned long sbnr, ebnr;
sector_t esector, nr_sectors;
......@@ -713,7 +713,7 @@ STATIC int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s
return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
}
STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
{
const int rw = bio_rw(bio);
const int size = bio->bi_size;
......
......@@ -71,13 +71,13 @@ static const char *drbd_disk_s_names[] = {
static const char *drbd_state_sw_errors[] = {
[-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config",
[-SS_NO_UP_TO_DATE_DISK] = "Refusing to be Primary without at least one UpToDate disk",
[-SS_BOTH_INCONSISTENT] = "Refusing to be inconsistent on both nodes",
[-SS_SYNCING_DISKLESS] = "Refusing to be syncing and diskless",
[-SS_NO_LOCAL_DISK] = "Can not resync without local disk",
[-SS_NO_REMOTE_DISK] = "Can not resync without remote disk",
[-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected",
[-SS_PRIMARY_NOP] = "Refusing to be Primary while peer is not outdated",
[-SS_RESYNC_RUNNING] = "Can not start OV/resync since it is already active",
[-SS_ALREADY_STANDALONE] = "Can not disconnect a StandAlone device",
[-SS_CW_FAILED_BY_PEER] = "State changed was refused by peer node",
[-SS_CW_FAILED_BY_PEER] = "State change was refused by peer node",
[-SS_IS_DISKLESS] = "Device is diskless, the requesed operation requires a disk",
[-SS_DEVICE_IN_USE] = "Device is held open by someone",
[-SS_NO_NET_CONFIG] = "Have no net/connection configuration",
......
......@@ -71,7 +71,7 @@ enum dbg_print_flags {
};
/* Macro stuff */
STATIC char *nl_packet_name(int packet_type)
static char *nl_packet_name(int packet_type)
{
/* Generate packet type strings */
#define NL_PACKET(name, number, fields) \
......@@ -371,7 +371,7 @@ static void probe_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt
static void probe_drbd_bio(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete,
struct drbd_request *r)
{
#ifdef CONFIG_LBD
#if defined(CONFIG_LBDAF) || defined(CONFIG_LBD)
#define SECTOR_FORMAT "%Lx"
#else
#define SECTOR_FORMAT "%lx"
......@@ -387,7 +387,7 @@ static void probe_drbd_bio(struct drbd_conf *mdev, const char *pfx, struct bio *
const int rw = bio->bi_rw;
const int biorw = (rw & (RW_MASK|RWA_MASK));
const int biobarrier = (rw & (1<<BIO_RW_BARRIER));
const int biosync = (rw & ((1<<BIO_RW_UNPLUG) | (1<<BIO_RW_SYNCIO)));
const int biosync = (rw & ((1<<BIO_RW_UNPLUG) | (1<<BIO_RW_SYNCIO)));
if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
return;
......@@ -504,7 +504,7 @@ do { \
} \
} while (0)
STATIC char *dump_st(char *p, int len, union drbd_state mask, union drbd_state val)
static char *dump_st(char *p, int len, union drbd_state mask, union drbd_state val)
{
char *op = p;
*p = '\0';
......@@ -531,7 +531,7 @@ do { \
} \
} while (0)
STATIC char *_dump_block_id(u64 block_id, char *buff)
static char *_dump_block_id(u64 block_id, char *buff)
{
if (is_syncer_block_id(block_id))
strcpy(buff, "SyncerId");
......
......@@ -26,12 +26,11 @@
#include <linux/autoconf.h>
#include <linux/module.h>
#include <linux/version.h>
#include <linux/drbd.h>
#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/wait.h>
#include <linux/mm.h>
#include <linux/drbd_config.h>
#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
#include <linux/slab.h>
......@@ -40,14 +39,13 @@
#include <linux/string.h>
#include <linux/scatterlist.h>
#include <linux/drbd.h>
#include "drbd_int.h"
#include "drbd_req.h"
#include "drbd_tracing.h"
#define SLEEP_TIME (HZ/10)
STATIC int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
......@@ -293,7 +291,7 @@ int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return 1; /* Simply ignore this! */
}
STATIC void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
{
struct hash_desc desc;
struct scatterlist sg;
......@@ -313,7 +311,7 @@ STATIC void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bi
crypto_hash_final(&desc, digest);
}
STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
int digest_size;
......@@ -329,7 +327,7 @@ STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel
if (likely(drbd_bio_uptodate(e->private_bio))) {
digest_size = crypto_hash_digestsize(mdev->csums_tfm);
digest = kmalloc(digest_size, GFP_KERNEL);
digest = kmalloc(digest_size, GFP_NOIO);
if (digest) {
drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest);
......@@ -359,7 +357,7 @@ STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel
#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
STATIC int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
{
struct drbd_epoch_entry *e;
......@@ -421,9 +419,9 @@ int w_make_resync_request(struct drbd_conf *mdev,
unsigned long bit;
sector_t sector;
const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
int max_segment_size = mdev->rq_queue->max_segment_size;
int number, i, size;
int align;
int max_segment_size = queue_max_segment_size(mdev->rq_queue);
int number, i, size, pe, mx;
int align, queued, sndbuf;
if (unlikely(cancel))
return 1;
......@@ -446,15 +444,40 @@ int w_make_resync_request(struct drbd_conf *mdev,
mdev->resync_work.cb = w_resync_inactive;
return 1;
}
/* All goto requeses have to happend after this block: get_ldev() */
number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
pe = atomic_read(&mdev->rs_pending_cnt);
if (atomic_read(&mdev->rs_pending_cnt) > number)
goto requeue;
number -= atomic_read(&mdev->rs_pending_cnt);
mutex_lock(&mdev->data.mutex);
if (mdev->data.socket)
mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req);
else
mx = 1;
mutex_unlock(&mdev->data.mutex);
/* For resync rates >160MB/sec, allow more pending RS requests */
if (number > mx)
mx = number;
/* Limit the nunber of pending RS requests to no more than the peer's receive buffer */
if ((pe + number) > mx) {
number = mx - pe;
}
for (i = 0; i < number; i++) {
/* Stop generating RS requests, when half of the sendbuffer is filled */
mutex_lock(&mdev->data.mutex);
if (mdev->data.socket) {
queued = mdev->data.socket->sk->sk_wmem_queued;
sndbuf = mdev->data.socket->sk->sk_sndbuf;
} else {
queued = 1;
sndbuf = 0;
}
mutex_unlock(&mdev->data.mutex);
if (queued > sndbuf / 2)
goto requeue;
next_sector:
size = BM_BLOCK_SIZE;
bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
......@@ -589,6 +612,11 @@ int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
sector = mdev->ov_position;
for (i = 0; i < number; i++) {
if (sector >= capacity) {
mdev->resync_work.cb = w_resync_inactive;
return 1;
}
size = BM_BLOCK_SIZE;
if (drbd_try_rs_begin_io(mdev, sector)) {
......@@ -605,11 +633,6 @@ int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return 0;
}
sector += BM_SECT_PER_BIT;
if (sector >= capacity) {
mdev->resync_work.cb = w_resync_inactive;
return 1;
}
}
mdev->ov_position = sector;
......@@ -628,7 +651,7 @@ int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return 1;
}
STATIC int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
kfree(w);
......@@ -766,6 +789,7 @@ out:
mdev->rs_total = 0;
mdev->rs_failed = 0;
mdev->rs_paused = 0;
mdev->ov_start_sector = 0;
if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n");
......@@ -911,7 +935,7 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
if (mdev->csums_tfm) {
digest_size = crypto_hash_digestsize(mdev->csums_tfm);
D_ASSERT(digest_size == di->digest_size);
digest = kmalloc(digest_size, GFP_KERNEL);
digest = kmalloc(digest_size, GFP_NOIO);
}
if (digest) {
drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest);
......@@ -967,13 +991,15 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
goto out;
digest_size = crypto_hash_digestsize(mdev->verify_tfm);
digest = kmalloc(digest_size, GFP_KERNEL);
/* FIXME if this allocation fails, online verify will not terminate! */
digest = kmalloc(digest_size, GFP_NOIO);
if (digest) {
drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest);
inc_rs_pending(mdev);
ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
digest, digest_size, P_OV_REPLY);
if (ok)
inc_rs_pending(mdev);
if (!ok)
dec_rs_pending(mdev);
kfree(digest);
}
......@@ -1021,7 +1047,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
if (likely(drbd_bio_uptodate(e->private_bio))) {
digest_size = crypto_hash_digestsize(mdev->verify_tfm);
digest = kmalloc(digest_size, GFP_KERNEL);
digest = kmalloc(digest_size, GFP_NOIO);
if (digest) {
drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest);
......@@ -1157,7 +1183,7 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return ok;
}
STATIC int _drbd_may_sync_now(struct drbd_conf *mdev)
static int _drbd_may_sync_now(struct drbd_conf *mdev)
{
struct drbd_conf *odev = mdev;
......@@ -1180,7 +1206,7 @@ STATIC int _drbd_may_sync_now(struct drbd_conf *mdev)
*
* Called from process context only (admin command and after_state_ch).
*/
STATIC int _drbd_pause_after(struct drbd_conf *mdev)
static int _drbd_pause_after(struct drbd_conf *mdev)
{
struct drbd_conf *odev;
int i, rv = 0;
......@@ -1205,7 +1231,7 @@ STATIC int _drbd_pause_after(struct drbd_conf *mdev)
*
* Called from process context only (admin command and worker).
*/
STATIC int _drbd_resume_next(struct drbd_conf *mdev)
static int _drbd_resume_next(struct drbd_conf *mdev)
{
struct drbd_conf *odev;
int i, rv = 0;
......@@ -1240,19 +1266,46 @@ void suspend_other_sg(struct drbd_conf *mdev)
write_unlock_irq(&global_state_lock);
}
void drbd_alter_sa(struct drbd_conf *mdev, int na)
static int sync_after_error(struct drbd_conf *mdev, int o_minor)
{
int changes;
struct drbd_conf *odev;
write_lock_irq(&global_state_lock);
mdev->sync_conf.after = na;
if (o_minor == -1)
return NO_ERROR;
if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
return ERR_SYNC_AFTER;
/* check for loops */
odev = minor_to_mdev(o_minor);
while (1) {
if (odev == mdev)
return ERR_SYNC_AFTER_CYCLE;
do {
changes = _drbd_pause_after(mdev);
changes |= _drbd_resume_next(mdev);
} while (changes);
/* dependency chain ends here, no cycles. */
if (odev->sync_conf.after == -1)
return NO_ERROR;
/* follow the dependency chain */
odev = minor_to_mdev(odev->sync_conf.after);
}
}
int drbd_alter_sa(struct drbd_conf *mdev, int na)
{
int changes;
int retcode;
write_lock_irq(&global_state_lock);
retcode = sync_after_error(mdev, na);
if (retcode == NO_ERROR) {
mdev->sync_conf.after = na;
do {
changes = _drbd_pause_after(mdev);
changes |= _drbd_resume_next(mdev);
} while (changes);
}
write_unlock_irq(&global_state_lock);
return retcode;
}
/**
......@@ -1268,6 +1321,11 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
union drbd_state ns;
int r;
if (mdev->state.conn >= C_SYNC_SOURCE) {
dev_err(DEV, "Resync already running!\n");
return;
}
trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, "Resync starting: side=%s\n",
side == C_SYNC_TARGET ? "SyncTarget" : "SyncSource");
......
......@@ -7,11 +7,6 @@
/* see get_sb_bdev and bd_claim */
extern char *drbd_sec_holder;
static inline sector_t drbd_get_hardsect_size(struct block_device *bdev)
{
return bdev->bd_disk->queue->hardsect_size;
}
/* sets the number of 512 byte sectors of our virtual device */
static inline void drbd_set_my_capacity(struct drbd_conf *mdev,
sector_t size)
......
......@@ -26,7 +26,6 @@
#ifndef DRBD_H
#define DRBD_H
#include <linux/connector.h>
#include <asm/types.h>
#ifdef __KERNEL__
......@@ -53,6 +52,13 @@
#endif
extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.2rc2"
#define API_VERSION 88
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 90
enum drbd_io_error_p {
EP_PASS_ON, /* FIXME should the better be named "Ignore"? */
EP_CALL_HELPER,
......@@ -171,8 +177,8 @@ enum drbd_conns {
C_WF_CONNECTION,
C_WF_REPORT_PARAMS, /* we have a socket */
C_CONNECTED, /* we have introduced each other */
C_STARTING_SYNC_S, /* starting full sync by IOCTL. */
C_STARTING_SYNC_T, /* stariing full sync by IOCTL. */
C_STARTING_SYNC_S, /* starting full sync by admin request. */
C_STARTING_SYNC_T, /* stariing full sync by admin request. */
C_WF_BITMAP_S,
C_WF_BITMAP_T,
C_WF_SYNC_UUID,
......@@ -249,8 +255,8 @@ enum drbd_state_ret_codes {
SS_UNKNOWN_ERROR = 0, /* Used to sleep longer in _drbd_request_state */
SS_TWO_PRIMARIES = -1,
SS_NO_UP_TO_DATE_DISK = -2,
SS_BOTH_INCONSISTENT = -4,
SS_SYNCING_DISKLESS = -5,
SS_NO_LOCAL_DISK = -4,
SS_NO_REMOTE_DISK = -5,
SS_CONNECTED_OUTDATES = -6,
SS_PRIMARY_NOP = -7,
SS_RESYNC_RUNNING = -8,
......
/*
drbd_config.h
DRBD's compile time configuration.
drbd is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
drbd is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with drbd; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef DRBD_CONFIG_H
#define DRBD_CONFIG_H
extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.1"
#define API_VERSION 88
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 90
#ifndef __CHECKER__ /* for a sparse run, we need all STATICs */
#define DBG_ALL_SYMBOLS /* no static functs, improves quality of OOPS traces */
#endif
/* Enable fault insertion code */
#define DRBD_ENABLE_FAULTS
#endif
......@@ -72,6 +72,10 @@
#define DRBD_SNDBUF_SIZE_MAX (10<<20)
#define DRBD_SNDBUF_SIZE_DEF (2*65535)
#define DRBD_RCVBUF_SIZE_MIN 0
#define DRBD_RCVBUF_SIZE_MAX (10<<20)
#define DRBD_RCVBUF_SIZE_DEF (2*65535)
/* @4k PageSize -> 128kB - 512MB */
#define DRBD_MAX_BUFFERS_MIN 32
#define DRBD_MAX_BUFFERS_MAX 131072
......
......@@ -55,6 +55,7 @@ NL_PACKET(net_conf, 5,
NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p)
NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict)
NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo)
NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size)
/* 59 addr_family was available in GIT, never released */
NL_BIT( 60, T_MANDATORY, mind_af)
NL_BIT( 27, T_MAY_IGNORE, want_lose)
......@@ -77,7 +78,7 @@ NL_PACKET(syncer_conf, 8,
NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX)
NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32)
NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX)
NL_BIT( 65, T_MAY_IGNORE, use_rle_encoding)
NL_BIT( 65, T_MAY_IGNORE, use_rle)
)
NL_PACKET(invalidate, 9, )
......@@ -121,6 +122,7 @@ NL_PACKET(dump_ee, 24,
)
NL_PACKET(start_ov, 25,
NL_INT64( 66, T_MAY_IGNORE, start_sector)
)
NL_PACKET(new_c_uuid, 26,
......
......@@ -27,6 +27,10 @@
#define LRU_CACHE_H
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/string.h> /* for memset */
#include <linux/seq_file.h>
/*
This header file (and its .c file; kernel-doc of functions see there)
......@@ -142,22 +146,29 @@ write intent log information, three of which are mentioned here.
* an element is said to be "in the active set",
* if either on "in_use" or "lru", i.e. lc_number != LC_FREE.
*
* DRBD currently only uses 61 elements on the resync lru_cache (total memory
* usage 2 pages), and up to 3833 elements on the act_log lru_cache, totalling
* ~215 kB for 64bit architechture, ~53 pages.
* DRBD currently (May 2009) only uses 61 elements on the resync lru_cache
* (total memory usage 2 pages), and up to 3833 elements on the act_log
* lru_cache, totalling ~215 kB for 64bit architechture, ~53 pages.
*
* We usually do not actually free these objects again, but only "recycle"
* them, as the change "index: -old_label, +LC_FREE" would need a transaction
* as well. Which also means that using a kmem_cache or even mempool to
* allocate the objects from wastes some resources. But it would avoid high
* order page allocations in kmalloc, so we may change to a kmem_cache backed
* allocation of the elements in the near future.
* as well. Which also means that using a kmem_cache to allocate the objects
* from wastes some resources.
* But it avoids high order page allocations in kmalloc.
*/
struct lc_element {
struct hlist_node colision;
struct list_head list; /* LRU list or free list */
unsigned int refcnt;
unsigned int lc_number;
unsigned refcnt;
/* back "pointer" into ts_cache->element[index],
* for paranoia, and for "ts_element_to_index" */
unsigned lc_index;
/* if we want to track a larger set of objects,
* it needs to become arch independend u64 */
unsigned lc_number;
/* special label when on free list */
#define LC_FREE (~0U)
};
struct lru_cache {
......@@ -166,16 +177,25 @@ struct lru_cache {
struct list_head free;
struct list_head in_use;
/* size of tracked objects */
/* the pre-created kmem cache to allocate the objects from */
struct kmem_cache *lc_cache;
/* size of tracked objects, used to memset(,0,) them in lc_reset */
size_t element_size;
/* offset of struct lc_element member in the tracked object */
size_t element_off;
/* number of elements (indices) */
unsigned int nr_elements;
/* Arbitrary limit on maximum tracked objects. Practical limit is much
* lower due to allocation failures, probably. For typical use cases,
* nr_elements should be a few thousand at most.
* This also limits the maximum value of ts_element.ts_index, allowing the
* 8 high bits of .ts_index to be overloaded with flags in the future. */
#define LC_MAX_ACTIVE (1<<24)
/* statistics */
unsigned int used;
unsigned used; /* number of lelements currently on in_use list */
unsigned long hits, misses, starving, dirty, changed;
/* see below: flag-bits for lru_cache */
......@@ -190,8 +210,9 @@ struct lru_cache {
void *lc_private;
const char *name;
struct hlist_head slot[0];
/* hash colision chains here, then element storage. */
/* nr_elements there */
struct hlist_head *lc_slot;
struct lc_element **lc_element;
};
......@@ -217,8 +238,8 @@ enum {
#define LC_DIRTY (1<<__LC_DIRTY)
#define LC_STARVING (1<<__LC_STARVING)
extern struct lru_cache *lc_create(const char *name, unsigned int e_count,
size_t e_size, size_t e_off);
extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
unsigned e_count, size_t e_size, size_t e_off);
extern void lc_reset(struct lru_cache *lc);
extern void lc_destroy(struct lru_cache *lc);
extern void lc_set(struct lru_cache *lc, unsigned int enr, int index);
......@@ -236,15 +257,22 @@ extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
void (*detail) (struct seq_file *, struct lc_element *));
/* This can be used to stop lc_get from changing the set of active elements.
* Note that the reference counts and order on the lru list may still change.
* returns true if we aquired the lock.
/**
* lc_try_lock - can be used to stop lc_get() from changing the tracked set
* @lc: the lru cache to operate on
*
* Note that the reference counts and order on the active and lru lists may
* still change. Returns true if we aquired the lock.
*/
static inline int lc_try_lock(struct lru_cache *lc)
{
return !test_and_set_bit(__LC_DIRTY, &lc->flags);
}
/**
* lc_unlock - unlock @lc, allow lc_get() to change the set again
* @lc: the lru cache to operate on
*/
static inline void lc_unlock(struct lru_cache *lc)
{
clear_bit(__LC_DIRTY, &lc->flags);
......@@ -257,29 +285,10 @@ static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
return e && e->refcnt;
}
#define LC_FREE (-1U)
#define lc_entry(ptr, type, member) \
container_of(ptr, type, member)
static inline struct lc_element *
lc_element_by_index(struct lru_cache *lc, unsigned int i)
{
BUG_ON(i >= lc->nr_elements);
return (struct lc_element *)(
((char *)(lc->slot + lc->nr_elements)) +
i * lc->element_size
+ lc->element_off);
}
static inline size_t lc_index_of(struct lru_cache *lc, struct lc_element *e)
{
size_t i = ((char *)(e) - lc->element_off
- ((char *)(lc->slot + lc->nr_elements)))
/ lc->element_size;
BUG_ON(i >= lc->nr_elements);
BUG_ON(e != lc_element_by_index(lc, i));
return i;
}
extern struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i);
extern unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e);
#endif
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment