Tracking DRBD mainline (and minor cleanups)

* drbd-8.3: (134 commits) Missing pices of the unaligned memory access stuff. possible fix for XEN crashes on disconnect fix regression: initial sync target hung in WFBitMapT fix a comment: there are no more ioctls. possible fix for XEN crashes on disconnect fix regression: initial sync target hung in WFBitMapT ... Removed compat code from lru_cache.h All STATIC -> static DRBD_ENABLE_FAULTS -> CONFIG_DRBD_FAULT_INJECTION * drbd-8.3: Fixed some errors/warnings when compiles without DBG_ALL_SYMBOLS (i.e. STATIC = static) Fixed a regression introduced with fb51e2eb1fac83839231499333bf683629388484 No longer include drbd_config.h directly, include drbd.h instead Got rid of drbd_config.h Support lru_cache as module Removing the drbd_buildtag.c file * drbd-8.3: Fixes for architectures that does not support unaligned memory accesses fix reading of the AL ring buffer sync handshake: fix detection of "unrelated" data - it was detected as "regular" split-brain * drbd-8.3: Preparing 8.3.2rc2 compat: 2.6.31 -- q->limits.* and accessor functions Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>

Tracking DRBD mainline (and minor cleanups)
* drbd-8.3: (134 commits) Missing pices of the unaligned memory access stuff. possible fix for XEN crashes on disconnect fix regression: initial sync target hung in WFBitMapT fix a comment: there are no more ioctls. possible fix for XEN crashes on disconnect fix regression: initial sync target hung in WFBitMapT ... Removed compat code from lru_cache.h All STATIC -> static DRBD_ENABLE_FAULTS -> CONFIG_DRBD_FAULT_INJECTION * drbd-8.3: Fixed some errors/warnings when compiles without DBG_ALL_SYMBOLS (i.e. STATIC = static) Fixed a regression introduced with fb51e2eb1fac83839231499333bf683629388484 No longer include drbd_config.h directly, include drbd.h instead Got rid of drbd_config.h Support lru_cache as module Removing the drbd_buildtag.c file * drbd-8.3: Fixes for architectures that does not support unaligned memory accesses fix reading of the AL ring buffer sync handshake: fix detection of "unrelated" data - it was detected as "regular" split-brain * drbd-8.3: Preparing 8.3.2rc2 compat: 2.6.31 -- q->limits.* and accessor functions Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
65b0b44d · Philipp Reisner · b8e44af9 · 65b0b44d · 65b0b44d · 65b0b44d
Commit 65b0b44d authored Jun 25, 2009 by Philipp Reisner
21 changed files
--- a/drivers/block/drbd/Kconfig
+++ b/drivers/block/drbd/Kconfig
@@ -46,3 +46,35 @@ config DRBD_TRACE
 	  Say Y here if you want to be able to trace various events in DRBD.

 	  If unsure, say N.
+
+config DRBD_FAULT_INJECTION
+	bool "DRBD fault injection"
+	depends on BLK_DEV_DRBD
+	help
+
+	  Say Y here if you want to simulate IO errors, in order to test DRBD's
+	  behavior.
+
+	  The actual simulation of IO errors is done by writing 3 values to
+	  /sys/module/drbd/parameters/
+
+	  enable_faults: bitmask of...
+	  1	meta data write
+	  2               read
+	  4	resync data write
+	  8	            read
+	  16	data write
+	  32	data read
+	  64	read ahead
+	  128	kmalloc of bitmap
+	  256	allocation of EE (epoch_entries)
+
+	  fault_devs: bitmask of minor numbers
+	  fault_rate: frequency in percent
+
+	  Example: Simulate data write errors on /dev/drbd0 with a probability of 5%.
+		echo 16 > /sys/module/drbd/parameters/enable_faults
+		echo 1 > /sys/module/drbd/parameters/fault_devs
+		echo 5 > /sys/module/drbd/parameters/fault_rate
+
+	  If unsure, say N.
--- a/drivers/block/drbd/Makefile
+++ b/drivers/block/drbd/Makefile
-drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o
+drbd-y := drbd_bitmap.o drbd_proc.o
 drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
 drbd-y += drbd_main.o drbd_strings.o drbd_nl.o


--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -77,7 +77,7 @@ void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...)
 	va_end(ap);
 }

-STATIC int _drbd_md_sync_page_io(struct drbd_conf *mdev,
+static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
 				 struct drbd_backing_dev *bdev,
 				 struct page *page, sector_t sector,
 				 int rw, int size)
@@ -133,7 +133,7 @@ STATIC int _drbd_md_sync_page_io(struct drbd_conf *mdev,
 int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
 			 sector_t sector, int rw)
 {
-	int hardsect_size, mask, ok;
+	int logical_block_size, mask, ok;
 	int offset = 0;
 	struct page *iop = mdev->md_io_page;

@@ -141,15 +141,15 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,

 	BUG_ON(!bdev->md_bdev);

-	hardsect_size = drbd_get_hardsect_size(bdev->md_bdev);
-	if (hardsect_size == 0)
-		hardsect_size = MD_SECTOR_SIZE;
+	logical_block_size = bdev_logical_block_size(bdev->md_bdev);
+	if (logical_block_size == 0)
+		logical_block_size = MD_SECTOR_SIZE;

-	/* in case hardsect_size != 512 [ s390 only? ] */
-	if (hardsect_size != MD_SECTOR_SIZE) {
-		mask = (hardsect_size / MD_SECTOR_SIZE) - 1;
+	/* in case logical_block_size != 512 [ s390 only? ] */
+	if (logical_block_size != MD_SECTOR_SIZE) {
+		mask = (logical_block_size / MD_SECTOR_SIZE) - 1;
 		D_ASSERT(mask == 1 || mask == 3 || mask == 7);
-		D_ASSERT(hardsect_size == (mask+1) * MD_SECTOR_SIZE);
+		D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE);
 		offset = sector & mask;
 		sector = sector & ~mask;
 		iop = mdev->md_io_tmpp;
@@ -161,11 +161,11 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
 			void *hp = page_address(mdev->md_io_tmpp);

 			ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector,
-					READ, hardsect_size);
+					READ, logical_block_size);

 			if (unlikely(!ok)) {
 				dev_err(DEV, "drbd_md_sync_page_io(,%llus,"
-				    "READ [hardsect_size!=512]) failed!\n",
+				    "READ [logical_block_size!=512]) failed!\n",
 				    (unsigned long long)sector);
 				return 0;
 			}
@@ -180,14 +180,14 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
 		     current->comm, current->pid, __func__,
 		     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");

-	ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, hardsect_size);
+	ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size);
 	if (unlikely(!ok)) {
 		dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n",
 		    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
 		return 0;
 	}

-	if (hardsect_size != MD_SECTOR_SIZE && !(rw & WRITE)) {
+	if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) {
 		void *p = page_address(mdev->md_io_page);
 		void *hp = page_address(mdev->md_io_tmpp);

@@ -378,7 +378,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
 *
 * Returns -1 on IO error, 0 on checksum error and 1 upon success.
 */
-STATIC int drbd_al_read_tr(struct drbd_conf *mdev,
+static int drbd_al_read_tr(struct drbd_conf *mdev,
 			   struct drbd_backing_dev *bdev,
 			   struct al_transaction *b,
 			   int index)
@@ -416,14 +416,14 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	int i;
 	int rv;
 	int mx;
-	int cnr;
 	int active_extents = 0;
 	int transactions = 0;
-	int overflow = 0;
-	int from = -1;
-	int to = -1;
-	u32 from_tnr = -1;
+	int found_valid = 0;
+	int from = 0;
+	int to = 0;
+	u32 from_tnr = 0;
 	u32 to_tnr = 0;
+	u32 cnr;

 	mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT);

@@ -444,22 +444,27 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 		}
 		cnr = be32_to_cpu(buffer->tr_number);

-		if (cnr == -1)
-			overflow = 1;
-
-		if (cnr < from_tnr && !overflow) {
+		if (++found_valid == 1) {
+			from = i;
+			to = i;
+			from_tnr = cnr;
+			to_tnr = cnr;
+			continue;
+		}
+		if ((int)cnr - (int)from_tnr < 0) {
+			D_ASSERT(from_tnr - cnr + i - from == mx+1);
 			from = i;
 			from_tnr = cnr;
 		}
-		if (cnr > to_tnr) {
+		if ((int)cnr - (int)to_tnr > 0) {
+			D_ASSERT(cnr - to_tnr == i - to);
 			to = i;
 			to_tnr = cnr;
 		}
 	}

-	if (from == -1 || to == -1) {
+	if (!found_valid) {
 		dev_warn(DEV, "No usable activity log found.\n");
-
 		mutex_unlock(&mdev->md_io_mutex);
 		return 1;
 	}
@@ -524,7 +529,7 @@ cancel:
 	return 1;
 }

-STATIC void atodb_endio(struct bio *bio, int error)
+static void atodb_endio(struct bio *bio, int error)
 {
 	struct drbd_atodb_wait *wc = bio->bi_private;
 	struct drbd_conf *mdev = wc->mdev;
@@ -555,7 +560,7 @@ STATIC void atodb_endio(struct bio *bio, int error)
 #define S2W(s)	((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
 /* activity log to on disk bitmap -- prepare bio unless that sector
 * is already covered by previously prepared bios */
-STATIC int atodb_prepare_unless_covered(struct drbd_conf *mdev,
+static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
 					struct bio **bios,
 					unsigned int enr,
 					struct drbd_atodb_wait *wc) __must_hold(local)
@@ -803,7 +808,7 @@ void drbd_al_shrink(struct drbd_conf *mdev)
 	wake_up(&mdev->al_wait);
 }

-STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused)
 {
 	struct update_odbm_work *udw = (struct update_odbm_work *)w;

@@ -840,7 +845,7 @@ STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused
 *
 * TODO will be obsoleted once we have a caching lru of the on disk bitmap
 */
-STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
+static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
 				      int count, int success)
 {
 	struct lc_element *e;

--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -26,6 +26,7 @@
 #include <linux/vmalloc.h>
 #include <linux/string.h>
 #include <linux/drbd.h>
+#include <asm/kmap_types.h>
 #include "drbd_int.h"

 /* OPAQUE outside this file!
@@ -150,7 +151,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
 }

 /* word offset to long pointer */
-STATIC unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km)
+static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km)
 {
 	struct page *page;
 	unsigned long page_nr;
@@ -197,7 +198,7 @@ void bm_unmap(unsigned long *p_addr)
 * to be able to report device specific.
 */

-STATIC void bm_free_pages(struct page **pages, unsigned long number)
+static void bm_free_pages(struct page **pages, unsigned long number)
 {
 	unsigned long i;
 	if (!pages)
@@ -215,7 +216,7 @@ STATIC void bm_free_pages(struct page **pages, unsigned long number)
 	}
 }

-STATIC void bm_vk_free(void *ptr, int v)
+static void bm_vk_free(void *ptr, int v)
 {
 	if (v)
 		vfree(ptr);
@@ -226,7 +227,7 @@ STATIC void bm_vk_free(void *ptr, int v)
 /*
 * "have" and "want" are NUMBER OF PAGES.
 */
-STATIC struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
+static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
 {
 	struct page **old_pages = b->bm_pages;
 	struct page **new_pages, *page;
@@ -239,7 +240,11 @@ STATIC struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
 	if (have == want)
 		return old_pages;

-	/* Trying kmalloc first, falling back to vmalloc... */
+	/* Trying kmalloc first, falling back to vmalloc.
+	 * GFP_KERNEL is ok, as this is done when a lower level disk is
+	 * "attached" to the drbd.  Context is receiver thread or cqueue
+	 * thread.  As we have no disk yet, we are not in the IO path,
+	 * not even the IO path of the peer. */
 	bytes = sizeof(struct page *)*want;
 	new_pages = kmalloc(bytes, GFP_KERNEL);
 	if (!new_pages) {
@@ -320,7 +325,7 @@ void drbd_bm_cleanup(struct drbd_conf *mdev)
 * this masks out the remaining bits.
 * Rerturns the number of bits cleared.
 */
-STATIC int bm_clear_surplus(struct drbd_bitmap *b)
+static int bm_clear_surplus(struct drbd_bitmap *b)
 {
 	const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
 	size_t w = b->bm_bits >> LN2_BPL;
@@ -343,7 +348,7 @@ STATIC int bm_clear_surplus(struct drbd_bitmap *b)
 	return cleared;
 }

-STATIC void bm_set_surplus(struct drbd_bitmap *b)
+static void bm_set_surplus(struct drbd_bitmap *b)
 {
 	const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
 	size_t w = b->bm_bits >> LN2_BPL;
@@ -362,7 +367,7 @@ STATIC void bm_set_surplus(struct drbd_bitmap *b)
 	bm_unmap(p_addr);
 }

-STATIC unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian)
+static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian)
 {
 	unsigned long *p_addr, *bm, offset = 0;
 	unsigned long bits = 0;
@@ -420,7 +425,7 @@ void _drbd_bm_recount_bits(struct drbd_conf *mdev, char *file, int line)
 }

 /* offset and len in long words.*/
-STATIC void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
+static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
 {
 	unsigned long *p_addr, *bm;
 	size_t do_now, end;
@@ -752,7 +757,7 @@ static void bm_async_io_complete(struct bio *bio, int error)
 	bio_put(bio);
 }

-STATIC void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local)
+static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local)
 {
 	/* we are process context. we always get a bio */
 	struct bio *bio = bio_alloc(GFP_KERNEL, 1);
@@ -790,6 +795,8 @@ void bm_cpu_to_lel(struct drbd_bitmap *b)
 	 * this may be optimized by using
 	 * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0;
 	 * the following is still not optimal, but better than nothing */
+	unsigned int i;
+	unsigned long *p_addr, *bm;
 	if (b->bm_set == 0) {
 		/* no page at all; avoid swap if all is 0 */
 		i = b->bm_number_of_pages;
@@ -801,12 +808,10 @@ void bm_cpu_to_lel(struct drbd_bitmap *b)
 		i = 0;
 	}
 	for (; i < b->bm_number_of_pages; i++) {
-		unsigned long *bm;
-		/* if you'd want to use kmap_atomic, you'd have to disable irq! */
-		p_addr = kmap(b->bm_pages[i]);
+		p_addr = kmap_atomic(b->bm_pages[i], KM_USER0);
 		for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++)
 			*bm = cpu_to_lel(*bm);
-		kunmap(p_addr);
+		kunmap_atomic(p_addr, KM_USER0);
 	}
 }
 # endif
@@ -816,7 +821,7 @@ void bm_cpu_to_lel(struct drbd_bitmap *b)
 /*
 * bm_rw: read/write the whole bitmap from/to its on disk location.
 */
-STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
+static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	/* sector_t sector; */

--- a/drivers/block/drbd/drbd_buildtag.c
+++ b/drivers/block/drbd/drbd_buildtag.c
-/* automatically generated. DO NOT EDIT. */
-#include <linux/drbd_config.h>
-const char *drbd_buildtag(void)
-{
-	return "GIT-hash: b0abb3832a730d4fbd145013f6f51fc977bba3cc drbd/drbd_int.h"
-		" build by phil@fat-tyre, 2009-05-15 11:54:26";
-}
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -106,22 +106,6 @@ extern char usermode_helper[];

 struct drbd_conf;

-#ifdef DBG_ALL_SYMBOLS
-# define STATIC
-#else
-# define STATIC static
-#endif
-
-/*
- * Some Message Macros
- *************************/
-
-#define DUMPP(A)   dev_err(DEV, #A " = %p in %s:%d\n", (A), __FILE__, __LINE__);
-#define DUMPLU(A)  dev_err(DEV, #A " = %lu in %s:%d\n", (unsigned long)(A), __FILE__, __LINE__);
-#define DUMPLLU(A) dev_err(DEV, #A " = %llu in %s:%d\n", (unsigned long long)(A), __FILE__, __LINE__);
-#define DUMPLX(A)  dev_err(DEV, #A " = %lx in %s:%d\n", (A), __FILE__, __LINE__);
-#define DUMPI(A)   dev_err(DEV, #A " = %d in %s:%d\n", (int)(A), __FILE__, __LINE__);
-

 /* to shorten dev_warn(DEV, "msg"); and relatives statements */
 #define DEV (disk_to_dev(mdev->vdisk))
@@ -139,14 +123,14 @@ struct drbd_conf;
 /* Defines to control fault insertion */
 enum {
    DRBD_FAULT_MD_WR = 0,	/* meta data write */
-    DRBD_FAULT_MD_RD,		/*           read  */
-    DRBD_FAULT_RS_WR,		/* resync          */
-    DRBD_FAULT_RS_RD,
-    DRBD_FAULT_DT_WR,		/* data            */
-    DRBD_FAULT_DT_RD,
-    DRBD_FAULT_DT_RA,		/* data read ahead */
-    DRBD_FAULT_BM_ALLOC,        /* bitmap allocation */
-    DRBD_FAULT_AL_EE,		/* alloc ee */
+    DRBD_FAULT_MD_RD = 1,	/*           read  */
+    DRBD_FAULT_RS_WR = 2,	/* resync          */
+    DRBD_FAULT_RS_RD = 3,
+    DRBD_FAULT_DT_WR = 4,	/* data            */
+    DRBD_FAULT_DT_RD = 5,
+    DRBD_FAULT_DT_RA = 6,	/* data read ahead */
+    DRBD_FAULT_BM_ALLOC = 7,	/* bitmap allocation */
+    DRBD_FAULT_AL_EE = 8,	/* alloc ee */

    DRBD_FAULT_MAX,
 };
@@ -332,6 +316,10 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
 #endif
 }

+#ifndef __packed
+#define __packed __attribute__((packed))
+#endif
+
 /* This is the layout for a packet on the wire.
 * The byteorder is the network byte order.
 *     (except block_id and barrier fields.
@@ -543,6 +531,7 @@ struct p_compressed_bm {
 	u8 code[0];
 } __packed;

+/* DCBP: Drbd Compressed Bitmap Packet ... */
 static inline enum drbd_bitmap_code
 DCBP_get_code(struct p_compressed_bm *p)
 {
@@ -795,6 +784,8 @@ enum {
 				 * but worker thread is still handling the cleanup.
 				 * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed,
 				 * while this is set. */
+	RESIZE_PENDING,		/* Size change detected locally, waiting for the response from
+				 * the peer, if it changed there as well. */
 };

 struct drbd_bitmap; /* opaque for drbd_conf */
@@ -946,12 +937,16 @@ struct drbd_conf {
 	unsigned long rs_mark_time;
 	/* skipped because csum was equeal [unit BM_BLOCK_SIZE] */
 	unsigned long rs_same_csum;
+
+	/* where does the admin want us to start? (sector) */
+	sector_t ov_start_sector;
+	/* where are we now? (sector) */
 	sector_t ov_position;
-	/* Start sector of out of sync range. */
+	/* Start sector of out of sync range (to merge printk reporting). */
 	sector_t ov_last_oos_start;
 	/* size of out-of-sync range in sectors. */
 	sector_t ov_last_oos_size;
-	unsigned long ov_left;
+	unsigned long ov_left; /* in bits */
 	struct crypto_hash *csums_tfm;
 	struct crypto_hash *verify_tfm;

@@ -991,7 +986,7 @@ struct drbd_conf {
 	atomic_t pp_in_use;
 	wait_queue_head_t ee_wait;
 	struct page *md_io_page;	/* one page buffer for md_io */
-	struct page *md_io_tmpp;	/* for hardsect_size != 512 [s390 only?] */
+	struct page *md_io_tmpp;	/* for logical_block_size != 512 */
 	struct mutex md_io_mutex;	/* protects the md_io_buffer */
 	spinlock_t al_lock;
 	wait_queue_head_t al_wait;
@@ -1103,7 +1098,7 @@ extern int drbd_send_protocol(struct drbd_conf *mdev);
 extern int drbd_send_uuids(struct drbd_conf *mdev);
 extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
 extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val);
-extern int drbd_send_sizes(struct drbd_conf *mdev);
+extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply);
 extern int _drbd_send_state(struct drbd_conf *mdev);
 extern int drbd_send_state(struct drbd_conf *mdev);
 extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
@@ -1127,8 +1122,6 @@ extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
 			struct p_data *dp);
 extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
 			    sector_t sector, int blksize, u64 block_id);
-extern int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
-			int offset, size_t size);
 extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
 			   struct drbd_epoch_entry *e);
 extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
@@ -1348,7 +1341,9 @@ extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, con
 /* drbd_main.c */

 extern struct kmem_cache *drbd_request_cache;
-extern struct kmem_cache *drbd_ee_cache;
+extern struct kmem_cache *drbd_ee_cache;	/* epoch entries */
+extern struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
+extern struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
 extern mempool_t *drbd_request_mempool;
 extern mempool_t *drbd_ee_mempool;

@@ -1388,7 +1383,7 @@ extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);

 /* drbd_worker.c */
 extern int drbd_worker(struct drbd_thread *thi);
-extern void drbd_alter_sa(struct drbd_conf *mdev, int na);
+extern int drbd_alter_sa(struct drbd_conf *mdev, int na);
 extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side);
 extern void resume_next_sg(struct drbd_conf *mdev);
 extern void suspend_other_sg(struct drbd_conf *mdev);
@@ -1409,7 +1404,7 @@ static inline void ov_oos_print(struct drbd_conf *mdev)
 }


-void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
+extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
 /* worker callbacks */
 extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int);
 extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int);
@@ -1704,9 +1699,11 @@ static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
 	}
 }

+/* Returns the number of 512 byte sectors of the device */
 static inline sector_t drbd_get_capacity(struct block_device *bdev)
 {
-	return bdev ? get_capacity(bdev->bd_disk) : 0;
+	/* return bdev ? get_capacity(bdev->bd_disk) : 0; */
+	return bdev ? bdev->bd_inode->i_size >> 9 : 0;
 }

 /**

--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -32,11 +32,10 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/drbd_config.h>
 #include <linux/drbd.h>
 #include "drbd_int.h"

-STATIC int drbd_proc_open(struct inode *inode, struct file *file);
+static int drbd_proc_open(struct inode *inode, struct file *file);


 struct proc_dir_entry *drbd_proc;
@@ -55,7 +54,7 @@ struct file_operations drbd_proc_fops = {
 *	[=====>..............] 33.5% (23456/123456)
 *	finish: 2:20:20 speed: 6,345 (6,456) K/sec
 */
-STATIC void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
+static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
 {
 	unsigned long db, dt, dbdt, rt, rs_left;
 	unsigned int res;
@@ -134,7 +133,7 @@ STATIC void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
 	seq_printf(seq, " K/sec\n");
 }

-STATIC void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
+static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
 {
 	struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);

@@ -144,7 +143,7 @@ STATIC void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
 		   );
 }

-STATIC int drbd_seq_show(struct seq_file *seq, void *v)
+static int drbd_seq_show(struct seq_file *seq, void *v)
 {
 	int i, hole = 0;
 	const char *sn;
@@ -259,7 +258,7 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }

-STATIC int drbd_proc_open(struct inode *inode, struct file *file)
+static int drbd_proc_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, drbd_seq_show, PDE(inode)->data);
 }

--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -121,8 +121,8 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
 		      list_empty(&req->w.list))) {
 			/* DEBUG ASSERT only; if this triggers, we
 			 * probably corrupt the worker list here */
-			DUMPP(req->w.list.next);
-			DUMPP(req->w.list.prev);
+			dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next);
+			dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev);
 		}
 		req->w.cb = w_io_error;
 		drbd_queue_work(&mdev->data.work, &req->w);
@@ -326,7 +326,7 @@ void _req_may_be_done(struct drbd_request *req, int error)
 * second hlist_for_each_entry becomes a noop. This is even simpler than to
 * grab a reference on the net_conf, and check for the two_primaries flag...
 */
-STATIC int _req_conflicts(struct drbd_request *req)
+static int _req_conflicts(struct drbd_request *req)
 {
 	struct drbd_conf *mdev = req->mdev;
 	const sector_t sector = req->sector;
@@ -689,7 +689,7 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error)
 *   since size may be bigger than BM_BLOCK_SIZE,
 *   we may need to check several bits.
 */
-STATIC int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
+static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
 {
 	unsigned long sbnr, ebnr;
 	sector_t esector, nr_sectors;
@@ -713,7 +713,7 @@ STATIC int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s
 	return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
 }

-STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
+static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
 {
 	const int rw = bio_rw(bio);
 	const int size = bio->bi_size;

--- a/drivers/block/drbd/drbd_strings.c
+++ b/drivers/block/drbd/drbd_strings.c
@@ -71,13 +71,13 @@ static const char *drbd_disk_s_names[] = {
 static const char *drbd_state_sw_errors[] = {
 	[-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config",
 	[-SS_NO_UP_TO_DATE_DISK] = "Refusing to be Primary without at least one UpToDate disk",
-	[-SS_BOTH_INCONSISTENT] = "Refusing to be inconsistent on both nodes",
-	[-SS_SYNCING_DISKLESS] = "Refusing to be syncing and diskless",
+	[-SS_NO_LOCAL_DISK] = "Can not resync without local disk",
+	[-SS_NO_REMOTE_DISK] = "Can not resync without remote disk",
 	[-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected",
 	[-SS_PRIMARY_NOP] = "Refusing to be Primary while peer is not outdated",
 	[-SS_RESYNC_RUNNING] = "Can not start OV/resync since it is already active",
 	[-SS_ALREADY_STANDALONE] = "Can not disconnect a StandAlone device",
-	[-SS_CW_FAILED_BY_PEER] = "State changed was refused by peer node",
+	[-SS_CW_FAILED_BY_PEER] = "State change was refused by peer node",
 	[-SS_IS_DISKLESS] = "Device is diskless, the requesed operation requires a disk",
 	[-SS_DEVICE_IN_USE] = "Device is held open by someone",
 	[-SS_NO_NET_CONFIG] = "Have no net/connection configuration",

--- a/drivers/block/drbd/drbd_tracing.c
+++ b/drivers/block/drbd/drbd_tracing.c
@@ -71,7 +71,7 @@ enum dbg_print_flags {
 };

 /* Macro stuff */
-STATIC char *nl_packet_name(int packet_type)
+static char *nl_packet_name(int packet_type)
 {
 /* Generate packet type strings */
 #define NL_PACKET(name, number, fields) \
@@ -371,7 +371,7 @@ static void probe_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt
 static void probe_drbd_bio(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete,
 			   struct drbd_request *r)
 {
-#ifdef CONFIG_LBD
+#if defined(CONFIG_LBDAF) || defined(CONFIG_LBD)
 #define SECTOR_FORMAT "%Lx"
 #else
 #define SECTOR_FORMAT "%lx"
@@ -387,7 +387,7 @@ static void probe_drbd_bio(struct drbd_conf *mdev, const char *pfx, struct bio *
 	const int rw = bio->bi_rw;
 	const int biorw      = (rw & (RW_MASK|RWA_MASK));
 	const int biobarrier = (rw & (1<<BIO_RW_BARRIER));
-	const int biosync    = (rw & ((1<<BIO_RW_UNPLUG) | (1<<BIO_RW_SYNCIO)));
+	const int biosync = (rw & ((1<<BIO_RW_UNPLUG) | (1<<BIO_RW_SYNCIO)));

 	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
 		return;
@@ -504,7 +504,7 @@ do {								\
 	}							\
 } while (0)

-STATIC char *dump_st(char *p, int len, union drbd_state mask, union drbd_state val)
+static char *dump_st(char *p, int len, union drbd_state mask, union drbd_state val)
 {
 	char *op = p;
 	*p = '\0';
@@ -531,7 +531,7 @@ do { \
 	} \
 } while (0)

-STATIC char *_dump_block_id(u64 block_id, char *buff)
+static char *_dump_block_id(u64 block_id, char *buff)
 {
 	if (is_syncer_block_id(block_id))
 		strcpy(buff, "SyncerId");

--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -26,12 +26,11 @@
 #include <linux/autoconf.h>
 #include <linux/module.h>
 #include <linux/version.h>
-
+#include <linux/drbd.h>
 #include <linux/sched.h>
 #include <linux/smp_lock.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
-#include <linux/drbd_config.h>
 #include <linux/memcontrol.h>
 #include <linux/mm_inline.h>
 #include <linux/slab.h>
@@ -40,14 +39,13 @@
 #include <linux/string.h>
 #include <linux/scatterlist.h>

-#include <linux/drbd.h>
 #include "drbd_int.h"
 #include "drbd_req.h"
 #include "drbd_tracing.h"

 #define SLEEP_TIME (HZ/10)

-STATIC int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
+static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);



@@ -293,7 +291,7 @@ int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	return 1; /* Simply ignore this! */
 }

-STATIC void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
+void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
 {
 	struct hash_desc desc;
 	struct scatterlist sg;
@@ -313,7 +311,7 @@ STATIC void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bi
 	crypto_hash_final(&desc, digest);
 }

-STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 {
 	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
 	int digest_size;
@@ -329,7 +327,7 @@ STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel

 	if (likely(drbd_bio_uptodate(e->private_bio))) {
 		digest_size = crypto_hash_digestsize(mdev->csums_tfm);
-		digest = kmalloc(digest_size, GFP_KERNEL);
+		digest = kmalloc(digest_size, GFP_NOIO);
 		if (digest) {
 			drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest);

@@ -359,7 +357,7 @@ STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel

 #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)

-STATIC int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
+static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
 {
 	struct drbd_epoch_entry *e;

@@ -421,9 +419,9 @@ int w_make_resync_request(struct drbd_conf *mdev,
 	unsigned long bit;
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
-	int max_segment_size = mdev->rq_queue->max_segment_size;
-	int number, i, size;
-	int align;
+	int max_segment_size = queue_max_segment_size(mdev->rq_queue);
+	int number, i, size, pe, mx;
+	int align, queued, sndbuf;

 	if (unlikely(cancel))
 		return 1;
@@ -446,15 +444,40 @@ int w_make_resync_request(struct drbd_conf *mdev,
 		mdev->resync_work.cb = w_resync_inactive;
 		return 1;
 	}
-	/* All goto requeses have to happend after this block: get_ldev() */

-	number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
+	number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
+	pe = atomic_read(&mdev->rs_pending_cnt);

-	if (atomic_read(&mdev->rs_pending_cnt) > number)
-		goto requeue;
-	number -= atomic_read(&mdev->rs_pending_cnt);
+	mutex_lock(&mdev->data.mutex);
+	if (mdev->data.socket)
+		mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req);
+	else
+		mx = 1;
+	mutex_unlock(&mdev->data.mutex);
+
+	/* For resync rates >160MB/sec, allow more pending RS requests */
+	if (number > mx)
+		mx = number;
+
+	/* Limit the nunber of pending RS requests to no more than the peer's receive buffer */
+	if ((pe + number) > mx) {
+		number = mx - pe;
+	}

 	for (i = 0; i < number; i++) {
+		/* Stop generating RS requests, when half of the sendbuffer is filled */
+		mutex_lock(&mdev->data.mutex);
+		if (mdev->data.socket) {
+			queued = mdev->data.socket->sk->sk_wmem_queued;
+			sndbuf = mdev->data.socket->sk->sk_sndbuf;
+		} else {
+			queued = 1;
+			sndbuf = 0;
+		}
+		mutex_unlock(&mdev->data.mutex);
+		if (queued > sndbuf / 2)
+			goto requeue;
+
 next_sector:
 		size = BM_BLOCK_SIZE;
 		bit  = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
@@ -589,6 +612,11 @@ int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel)

 	sector = mdev->ov_position;
 	for (i = 0; i < number; i++) {
+		if (sector >= capacity) {
+			mdev->resync_work.cb = w_resync_inactive;
+			return 1;
+		}
+
 		size = BM_BLOCK_SIZE;

 		if (drbd_try_rs_begin_io(mdev, sector)) {
@@ -605,11 +633,6 @@ int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 			return 0;
 		}
 		sector += BM_SECT_PER_BIT;
-		if (sector >= capacity) {
-			mdev->resync_work.cb = w_resync_inactive;
-
-			return 1;
-		}
 	}
 	mdev->ov_position = sector;

@@ -628,7 +651,7 @@ int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	return 1;
 }

-STATIC int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 {
 	kfree(w);

@@ -766,6 +789,7 @@ out:
 	mdev->rs_total  = 0;
 	mdev->rs_failed = 0;
 	mdev->rs_paused = 0;
+	mdev->ov_start_sector = 0;

 	if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
 		dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n");
@@ -911,7 +935,7 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 		if (mdev->csums_tfm) {
 			digest_size = crypto_hash_digestsize(mdev->csums_tfm);
 			D_ASSERT(digest_size == di->digest_size);
-			digest = kmalloc(digest_size, GFP_KERNEL);
+			digest = kmalloc(digest_size, GFP_NOIO);
 		}
 		if (digest) {
 			drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest);
@@ -967,13 +991,15 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 		goto out;

 	digest_size = crypto_hash_digestsize(mdev->verify_tfm);
-	digest = kmalloc(digest_size, GFP_KERNEL);
+	/* FIXME if this allocation fails, online verify will not terminate! */
+	digest = kmalloc(digest_size, GFP_NOIO);
 	if (digest) {
 		drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest);
+		inc_rs_pending(mdev);
 		ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
 					     digest, digest_size, P_OV_REPLY);
-		if (ok)
-			inc_rs_pending(mdev);
+		if (!ok)
+			dec_rs_pending(mdev);
 		kfree(digest);
 	}

@@ -1021,7 +1047,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)

 	if (likely(drbd_bio_uptodate(e->private_bio))) {
 		digest_size = crypto_hash_digestsize(mdev->verify_tfm);
-		digest = kmalloc(digest_size, GFP_KERNEL);
+		digest = kmalloc(digest_size, GFP_NOIO);
 		if (digest) {
 			drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest);

@@ -1157,7 +1183,7 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	return ok;
 }

-STATIC int _drbd_may_sync_now(struct drbd_conf *mdev)
+static int _drbd_may_sync_now(struct drbd_conf *mdev)
 {
 	struct drbd_conf *odev = mdev;

@@ -1180,7 +1206,7 @@ STATIC int _drbd_may_sync_now(struct drbd_conf *mdev)
 *
 * Called from process context only (admin command and after_state_ch).
 */
-STATIC int _drbd_pause_after(struct drbd_conf *mdev)
+static int _drbd_pause_after(struct drbd_conf *mdev)
 {
 	struct drbd_conf *odev;
 	int i, rv = 0;
@@ -1205,7 +1231,7 @@ STATIC int _drbd_pause_after(struct drbd_conf *mdev)
 *
 * Called from process context only (admin command and worker).
 */
-STATIC int _drbd_resume_next(struct drbd_conf *mdev)
+static int _drbd_resume_next(struct drbd_conf *mdev)
 {
 	struct drbd_conf *odev;
 	int i, rv = 0;
@@ -1240,19 +1266,46 @@ void suspend_other_sg(struct drbd_conf *mdev)
 	write_unlock_irq(&global_state_lock);
 }

-void drbd_alter_sa(struct drbd_conf *mdev, int na)
+static int sync_after_error(struct drbd_conf *mdev, int o_minor)
 {
-	int changes;
+	struct drbd_conf *odev;

-	write_lock_irq(&global_state_lock);
-	mdev->sync_conf.after = na;
+	if (o_minor == -1)
+		return NO_ERROR;
+	if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
+		return ERR_SYNC_AFTER;
+
+	/* check for loops */
+	odev = minor_to_mdev(o_minor);
+	while (1) {
+		if (odev == mdev)
+			return ERR_SYNC_AFTER_CYCLE;

-	do {
-		changes  = _drbd_pause_after(mdev);
-		changes |= _drbd_resume_next(mdev);
-	} while (changes);
+		/* dependency chain ends here, no cycles. */
+		if (odev->sync_conf.after == -1)
+			return NO_ERROR;

+		/* follow the dependency chain */
+		odev = minor_to_mdev(odev->sync_conf.after);
+	}
+}
+
+int drbd_alter_sa(struct drbd_conf *mdev, int na)
+{
+	int changes;
+	int retcode;
+
+	write_lock_irq(&global_state_lock);
+	retcode = sync_after_error(mdev, na);
+	if (retcode == NO_ERROR) {
+		mdev->sync_conf.after = na;
+		do {
+			changes  = _drbd_pause_after(mdev);
+			changes |= _drbd_resume_next(mdev);
+		} while (changes);
+	}
 	write_unlock_irq(&global_state_lock);
+	return retcode;
 }

 /**
@@ -1268,6 +1321,11 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
 	union drbd_state ns;
 	int r;

+	if (mdev->state.conn >= C_SYNC_SOURCE) {
+		dev_err(DEV, "Resync already running!\n");
+		return;
+	}
+
 	trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, "Resync starting: side=%s\n",
 			  side == C_SYNC_TARGET ? "SyncTarget" : "SyncSource");


--- a/drivers/block/drbd/drbd_wrappers.h
+++ b/drivers/block/drbd/drbd_wrappers.h
@@ -7,11 +7,6 @@
 /* see get_sb_bdev and bd_claim */
 extern char *drbd_sec_holder;

-static inline sector_t drbd_get_hardsect_size(struct block_device *bdev)
-{
-	return bdev->bd_disk->queue->hardsect_size;
-}
-
 /* sets the number of 512 byte sectors of our virtual device */
 static inline void drbd_set_my_capacity(struct drbd_conf *mdev,
 					sector_t size)

--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -26,7 +26,6 @@
 #ifndef DRBD_H
 #define DRBD_H
 #include <linux/connector.h>
-
 #include <asm/types.h>

 #ifdef __KERNEL__
@@ -53,6 +52,13 @@
 #endif


+extern const char *drbd_buildtag(void);
+#define REL_VERSION "8.3.2rc2"
+#define API_VERSION 88
+#define PRO_VERSION_MIN 86
+#define PRO_VERSION_MAX 90
+
+
 enum drbd_io_error_p {
 	EP_PASS_ON, /* FIXME should the better be named "Ignore"? */
 	EP_CALL_HELPER,
@@ -171,8 +177,8 @@ enum drbd_conns {
 	C_WF_CONNECTION,
 	C_WF_REPORT_PARAMS, /* we have a socket */
 	C_CONNECTED,      /* we have introduced each other */
-	C_STARTING_SYNC_S,  /* starting full sync by IOCTL. */
-	C_STARTING_SYNC_T,  /* stariing full sync by IOCTL. */
+	C_STARTING_SYNC_S,  /* starting full sync by admin request. */
+	C_STARTING_SYNC_T,  /* stariing full sync by admin request. */
 	C_WF_BITMAP_S,
 	C_WF_BITMAP_T,
 	C_WF_SYNC_UUID,
@@ -249,8 +255,8 @@ enum drbd_state_ret_codes {
 	SS_UNKNOWN_ERROR = 0, /* Used to sleep longer in _drbd_request_state */
 	SS_TWO_PRIMARIES = -1,
 	SS_NO_UP_TO_DATE_DISK = -2,
-	SS_BOTH_INCONSISTENT = -4,
-	SS_SYNCING_DISKLESS = -5,
+	SS_NO_LOCAL_DISK = -4,
+	SS_NO_REMOTE_DISK = -5,
 	SS_CONNECTED_OUTDATES = -6,
 	SS_PRIMARY_NOP = -7,
 	SS_RESYNC_RUNNING = -8,

--- a/include/linux/drbd_config.h
+++ b/include/linux/drbd_config.h
-/*
-  drbd_config.h
-  DRBD's compile time configuration.
-
-  drbd is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; either version 2, or (at your option)
-  any later version.
-
-  drbd is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with drbd; see the file COPYING.  If not, write to
-  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-#ifndef DRBD_CONFIG_H
-#define DRBD_CONFIG_H
-
-extern const char *drbd_buildtag(void);
-
-#define REL_VERSION "8.3.1"
-#define API_VERSION 88
-#define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 90
-
-#ifndef __CHECKER__   /* for a sparse run, we need all STATICs */
-#define DBG_ALL_SYMBOLS /* no static functs, improves quality of OOPS traces */
-#endif
-
-/* Enable fault insertion code */
-#define DRBD_ENABLE_FAULTS
-
-#endif
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -72,6 +72,10 @@
 #define DRBD_SNDBUF_SIZE_MAX  (10<<20)
 #define DRBD_SNDBUF_SIZE_DEF  (2*65535)

+#define DRBD_RCVBUF_SIZE_MIN  0
+#define DRBD_RCVBUF_SIZE_MAX  (10<<20)
+#define DRBD_RCVBUF_SIZE_DEF  (2*65535)
+
  /* @4k PageSize -> 128kB - 512MB */
 #define DRBD_MAX_BUFFERS_MIN  32
 #define DRBD_MAX_BUFFERS_MAX  131072

--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -55,6 +55,7 @@ NL_PACKET(net_conf, 5,
 	NL_INTEGER(	26,	T_MAY_IGNORE,	after_sb_2p)
 	NL_INTEGER(	39,	T_MAY_IGNORE,	rr_conflict)
 	NL_INTEGER(	40,	T_MAY_IGNORE,	ping_timeo)
+	NL_INTEGER(	67,	T_MAY_IGNORE,	rcvbuf_size)
 	  /* 59 addr_family was available in GIT, never released */
 	NL_BIT(		60,	T_MANDATORY,	mind_af)
 	NL_BIT(		27,	T_MAY_IGNORE,	want_lose)
@@ -77,7 +78,7 @@ NL_PACKET(syncer_conf, 8,
 	NL_STRING(      52,     T_MAY_IGNORE,   verify_alg,     SHARED_SECRET_MAX)
 	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
 	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)
-	NL_BIT(         65,     T_MAY_IGNORE,   use_rle_encoding)
+	NL_BIT(         65,     T_MAY_IGNORE,   use_rle)
 )

 NL_PACKET(invalidate, 9, )
@@ -121,6 +122,7 @@ NL_PACKET(dump_ee, 24,
 )

 NL_PACKET(start_ov, 25,
+	NL_INT64(	66,	T_MAY_IGNORE,	start_sector)
 )

 NL_PACKET(new_c_uuid, 26,

--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -27,6 +27,10 @@
 #define LRU_CACHE_H

 #include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/string.h> /* for memset */
+#include <linux/seq_file.h>

 /*
 This header file (and its .c file; kernel-doc of functions see there)
@@ -142,22 +146,29 @@ write intent log information, three of which are mentioned here.
 * an element is said to be "in the active set",
 * if either on "in_use" or "lru", i.e. lc_number != LC_FREE.
 *
- * DRBD currently only uses 61 elements on the resync lru_cache (total memory
- * usage 2 pages), and up to 3833 elements on the act_log lru_cache, totalling
- * ~215 kB for 64bit architechture, ~53 pages.
+ * DRBD currently (May 2009) only uses 61 elements on the resync lru_cache
+ * (total memory usage 2 pages), and up to 3833 elements on the act_log
+ * lru_cache, totalling ~215 kB for 64bit architechture, ~53 pages.
 *
 * We usually do not actually free these objects again, but only "recycle"
 * them, as the change "index: -old_label, +LC_FREE" would need a transaction
- * as well.  Which also means that using a kmem_cache or even mempool to
- * allocate the objects from wastes some resources. But it would avoid high
- * order page allocations in kmalloc, so we may change to a kmem_cache backed
- * allocation of the elements in the near future.
+ * as well.  Which also means that using a kmem_cache to allocate the objects
+ * from wastes some resources.
+ * But it avoids high order page allocations in kmalloc.
 */
 struct lc_element {
 	struct hlist_node colision;
 	struct list_head list;		 /* LRU list or free list */
-	unsigned int refcnt;
-	unsigned int lc_number;
+	unsigned refcnt;
+	/* back "pointer" into ts_cache->element[index],
+	 * for paranoia, and for "ts_element_to_index" */
+	unsigned lc_index;
+	/* if we want to track a larger set of objects,
+	 * it needs to become arch independend u64 */
+	unsigned lc_number;
+
+	/* special label when on free list */
+#define LC_FREE (~0U)
 };

 struct lru_cache {
@@ -166,16 +177,25 @@ struct lru_cache {
 	struct list_head free;
 	struct list_head in_use;

-	/* size of tracked objects */
+	/* the pre-created kmem cache to allocate the objects from */
+	struct kmem_cache *lc_cache;
+
+	/* size of tracked objects, used to memset(,0,) them in lc_reset */
 	size_t element_size;
 	/* offset of struct lc_element member in the tracked object */
 	size_t element_off;

 	/* number of elements (indices) */
 	unsigned int  nr_elements;
+	/* Arbitrary limit on maximum tracked objects. Practical limit is much
+	 * lower due to allocation failures, probably. For typical use cases,
+	 * nr_elements should be a few thousand at most.
+	 * This also limits the maximum value of ts_element.ts_index, allowing the
+	 * 8 high bits of .ts_index to be overloaded with flags in the future. */
+#define LC_MAX_ACTIVE	(1<<24)

 	/* statistics */
-	unsigned int used;
+	unsigned used; /* number of lelements currently on in_use list */
 	unsigned long hits, misses, starving, dirty, changed;

 	/* see below: flag-bits for lru_cache */
@@ -190,8 +210,9 @@ struct lru_cache {
 	void  *lc_private;
 	const char *name;

-	struct hlist_head slot[0];
-	/* hash colision chains here, then element storage. */
+	/* nr_elements there */
+	struct hlist_head *lc_slot;
+	struct lc_element **lc_element;
 };


@@ -217,8 +238,8 @@ enum {
 #define LC_DIRTY    (1<<__LC_DIRTY)
 #define LC_STARVING (1<<__LC_STARVING)

-extern struct lru_cache *lc_create(const char *name, unsigned int e_count,
-				  size_t e_size, size_t e_off);
+extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+		unsigned e_count, size_t e_size, size_t e_off);
 extern void lc_reset(struct lru_cache *lc);
 extern void lc_destroy(struct lru_cache *lc);
 extern void lc_set(struct lru_cache *lc, unsigned int enr, int index);
@@ -236,15 +257,22 @@ extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
 extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
 				void (*detail) (struct seq_file *, struct lc_element *));

-/* This can be used to stop lc_get from changing the set of active elements.
- * Note that the reference counts and order on the lru list may still change.
- * returns true if we aquired the lock.
+/**
+ * lc_try_lock - can be used to stop lc_get() from changing the tracked set
+ * @lc: the lru cache to operate on
+ *
+ * Note that the reference counts and order on the active and lru lists may
+ * still change.  Returns true if we aquired the lock.
 */
 static inline int lc_try_lock(struct lru_cache *lc)
 {
 	return !test_and_set_bit(__LC_DIRTY, &lc->flags);
 }

+/**
+ * lc_unlock - unlock @lc, allow lc_get() to change the set again
+ * @lc: the lru cache to operate on
+ */
 static inline void lc_unlock(struct lru_cache *lc)
 {
 	clear_bit(__LC_DIRTY, &lc->flags);
@@ -257,29 +285,10 @@ static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
 	return e && e->refcnt;
 }

-#define LC_FREE (-1U)
-
 #define lc_entry(ptr, type, member) \
 	container_of(ptr, type, member)

-static inline struct lc_element *
-lc_element_by_index(struct lru_cache *lc, unsigned int i)
-{
-	BUG_ON(i >= lc->nr_elements);
-	return (struct lc_element *)(
-			((char *)(lc->slot + lc->nr_elements)) +
-			i * lc->element_size
-			+ lc->element_off);
-}
-
-static inline size_t lc_index_of(struct lru_cache *lc, struct lc_element *e)
-{
-	size_t i = ((char *)(e) - lc->element_off
-		- ((char *)(lc->slot + lc->nr_elements)))
-		/ lc->element_size;
-	BUG_ON(i >= lc->nr_elements);
-	BUG_ON(e != lc_element_by_index(lc, i));
-	return i;
-}
+extern struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i);
+extern unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e);

 #endif
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c