drbd_int.h - drivers/block/drbd/drbd_int.h - Linux diff v3.1

   1/*
   2  drbd_int.h
   3
   4  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
   5
   6  Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
   7  Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   8  Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
   9
  10  drbd is free software; you can redistribute it and/or modify
  11  it under the terms of the GNU General Public License as published by
  12  the Free Software Foundation; either version 2, or (at your option)
  13  any later version.
  14
  15  drbd is distributed in the hope that it will be useful,
  16  but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
  18  GNU General Public License for more details.
  19
  20  You should have received a copy of the GNU General Public License
  21  along with drbd; see the file COPYING.  If not, write to
  22  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  23
  24*/
  25
  26#ifndef _DRBD_INT_H
  27#define _DRBD_INT_H
  28
 
  29#include <linux/compiler.h>
  30#include <linux/types.h>
  31#include <linux/version.h>
  32#include <linux/list.h>
  33#include <linux/sched.h>
  34#include <linux/bitops.h>
  35#include <linux/slab.h>
  36#include <linux/crypto.h>
  37#include <linux/ratelimit.h>
  38#include <linux/tcp.h>
  39#include <linux/mutex.h>
  40#include <linux/major.h>
  41#include <linux/blkdev.h>
 
  42#include <linux/genhd.h>
 
 
  43#include <net/tcp.h>
  44#include <linux/lru_cache.h>
  45#include <linux/prefetch.h>
 
 
 
 
 
  46
  47#ifdef __CHECKER__
  48# define __protected_by(x)       __attribute__((require_context(x,1,999,"rdwr")))
  49# define __protected_read_by(x)  __attribute__((require_context(x,1,999,"read")))
  50# define __protected_write_by(x) __attribute__((require_context(x,1,999,"write")))
  51# define __must_hold(x)       __attribute__((context(x,1,1), require_context(x,1,999,"call")))
  52#else
  53# define __protected_by(x)
  54# define __protected_read_by(x)
  55# define __protected_write_by(x)
  56# define __must_hold(x)
  57#endif
  58
  59#define __no_warn(lock, stmt) do { __acquire(lock); stmt; __release(lock); } while (0)
  60
  61/* module parameter, defined in drbd_main.c */
  62extern unsigned int minor_count;
  63extern int disable_sendpage;
  64extern int allow_oos;
  65extern unsigned int cn_idx;
  66
  67#ifdef CONFIG_DRBD_FAULT_INJECTION
  68extern int enable_faults;
  69extern int fault_rate;
  70extern int fault_devs;
  71#endif
  72
  73extern char usermode_helper[];
  74
 
  75
  76/* I don't remember why XCPU ...
  77 * This is used to wake the asender,
  78 * and to interrupt sending the sending task
  79 * on disconnect.
  80 */
  81#define DRBD_SIG SIGXCPU
  82
  83/* This is used to stop/restart our threads.
  84 * Cannot use SIGTERM nor SIGKILL, since these
  85 * are sent out by init on runlevel changes
  86 * I choose SIGHUP for now.
  87 */
  88#define DRBD_SIGKILL SIGHUP
  89
  90/* All EEs on the free list should have ID_VACANT (== 0)
  91 * freshly allocated EEs get !ID_VACANT (== 1)
  92 * so if it says "cannot dereference null pointer at address 0x00000001",
  93 * it is most likely one of these :( */
  94
  95#define ID_IN_SYNC      (4711ULL)
  96#define ID_OUT_OF_SYNC  (4712ULL)
  97
  98#define ID_SYNCER (-1ULL)
  99#define ID_VACANT 0
 100#define is_syncer_block_id(id) ((id) == ID_SYNCER)
 101#define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL)
 102
 103struct drbd_conf;
 104
 
 105
 106/* to shorten dev_warn(DEV, "msg"); and relatives statements */
 107#define DEV (disk_to_dev(mdev->vdisk))
 108
 109#define D_ASSERT(exp)	if (!(exp)) \
 110	 dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 111
 112#define ERR_IF(exp) if (({						\
 113	int _b = (exp) != 0;						\
 114	if (_b) dev_err(DEV, "ASSERT FAILED: %s: (%s) in %s:%d\n",	\
 115			__func__, #exp, __FILE__, __LINE__);		\
 116	_b;								\
 117	}))
 
 
 
 
 
 
 118
 119/* Defines to control fault insertion */
 120enum {
 121	DRBD_FAULT_MD_WR = 0,	/* meta data write */
 122	DRBD_FAULT_MD_RD = 1,	/*           read  */
 123	DRBD_FAULT_RS_WR = 2,	/* resync          */
 124	DRBD_FAULT_RS_RD = 3,
 125	DRBD_FAULT_DT_WR = 4,	/* data            */
 126	DRBD_FAULT_DT_RD = 5,
 127	DRBD_FAULT_DT_RA = 6,	/* data read ahead */
 128	DRBD_FAULT_BM_ALLOC = 7,	/* bitmap allocation */
 129	DRBD_FAULT_AL_EE = 8,	/* alloc ee */
 130	DRBD_FAULT_RECEIVE = 9, /* Changes some bytes upon receiving a [rs]data block */
 131
 132	DRBD_FAULT_MAX,
 133};
 134
 135extern unsigned int
 136_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type);
 137
 138static inline int
 139drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) {
 140#ifdef CONFIG_DRBD_FAULT_INJECTION
 141	return fault_rate &&
 142		(enable_faults & (1<<type)) &&
 143		_drbd_insert_fault(mdev, type);
 144#else
 145	return 0;
 146#endif
 147}
 148
 149/* integer division, round _UP_ to the next integer */
 150#define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0))
 151/* usual integer division */
 152#define div_floor(A, B) ((A)/(B))
 153
 154/* drbd_meta-data.c (still in drbd_main.c) */
 155/* 4th incarnation of the disk layout. */
 156#define DRBD_MD_MAGIC (DRBD_MAGIC+4)
 157
 158extern struct drbd_conf **minor_table;
 159extern struct ratelimit_state drbd_ratelimit_state;
 
 
 160
 161/* on the wire */
 162enum drbd_packets {
 163	/* receiver (data socket) */
 164	P_DATA		      = 0x00,
 165	P_DATA_REPLY	      = 0x01, /* Response to P_DATA_REQUEST */
 166	P_RS_DATA_REPLY	      = 0x02, /* Response to P_RS_DATA_REQUEST */
 167	P_BARRIER	      = 0x03,
 168	P_BITMAP	      = 0x04,
 169	P_BECOME_SYNC_TARGET  = 0x05,
 170	P_BECOME_SYNC_SOURCE  = 0x06,
 171	P_UNPLUG_REMOTE	      = 0x07, /* Used at various times to hint the peer */
 172	P_DATA_REQUEST	      = 0x08, /* Used to ask for a data block */
 173	P_RS_DATA_REQUEST     = 0x09, /* Used to ask for a data block for resync */
 174	P_SYNC_PARAM	      = 0x0a,
 175	P_PROTOCOL	      = 0x0b,
 176	P_UUIDS		      = 0x0c,
 177	P_SIZES		      = 0x0d,
 178	P_STATE		      = 0x0e,
 179	P_SYNC_UUID	      = 0x0f,
 180	P_AUTH_CHALLENGE      = 0x10,
 181	P_AUTH_RESPONSE	      = 0x11,
 182	P_STATE_CHG_REQ	      = 0x12,
 183
 184	/* asender (meta socket */
 185	P_PING		      = 0x13,
 186	P_PING_ACK	      = 0x14,
 187	P_RECV_ACK	      = 0x15, /* Used in protocol B */
 188	P_WRITE_ACK	      = 0x16, /* Used in protocol C */
 189	P_RS_WRITE_ACK	      = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
 190	P_DISCARD_ACK	      = 0x18, /* Used in proto C, two-primaries conflict detection */
 191	P_NEG_ACK	      = 0x19, /* Sent if local disk is unusable */
 192	P_NEG_DREPLY	      = 0x1a, /* Local disk is broken... */
 193	P_NEG_RS_DREPLY	      = 0x1b, /* Local disk is broken... */
 194	P_BARRIER_ACK	      = 0x1c,
 195	P_STATE_CHG_REPLY     = 0x1d,
 196
 197	/* "new" commands, no longer fitting into the ordering scheme above */
 198
 199	P_OV_REQUEST	      = 0x1e, /* data socket */
 200	P_OV_REPLY	      = 0x1f,
 201	P_OV_RESULT	      = 0x20, /* meta socket */
 202	P_CSUM_RS_REQUEST     = 0x21, /* data socket */
 203	P_RS_IS_IN_SYNC	      = 0x22, /* meta socket */
 204	P_SYNC_PARAM89	      = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */
 205	P_COMPRESSED_BITMAP   = 0x24, /* compressed or otherwise encoded bitmap transfer */
 206	/* P_CKPT_FENCE_REQ      = 0x25, * currently reserved for protocol D */
 207	/* P_CKPT_DISABLE_REQ    = 0x26, * currently reserved for protocol D */
 208	P_DELAY_PROBE         = 0x27, /* is used on BOTH sockets */
 209	P_OUT_OF_SYNC         = 0x28, /* Mark as out of sync (Outrunning), data socket */
 210	P_RS_CANCEL           = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
 211
 212	P_MAX_CMD	      = 0x2A,
 213	P_MAY_IGNORE	      = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
 214	P_MAX_OPT_CMD	      = 0x101,
 215
 216	/* special command ids for handshake */
 217
 218	P_HAND_SHAKE_M	      = 0xfff1, /* First Packet on the MetaSock */
 219	P_HAND_SHAKE_S	      = 0xfff2, /* First Packet on the Socket */
 220
 221	P_HAND_SHAKE	      = 0xfffe	/* FIXED for the next century! */
 222};
 223
 224static inline const char *cmdname(enum drbd_packets cmd)
 225{
 226	/* THINK may need to become several global tables
 227	 * when we want to support more than
 228	 * one PRO_VERSION */
 229	static const char *cmdnames[] = {
 230		[P_DATA]	        = "Data",
 231		[P_DATA_REPLY]	        = "DataReply",
 232		[P_RS_DATA_REPLY]	= "RSDataReply",
 233		[P_BARRIER]	        = "Barrier",
 234		[P_BITMAP]	        = "ReportBitMap",
 235		[P_BECOME_SYNC_TARGET]  = "BecomeSyncTarget",
 236		[P_BECOME_SYNC_SOURCE]  = "BecomeSyncSource",
 237		[P_UNPLUG_REMOTE]	= "UnplugRemote",
 238		[P_DATA_REQUEST]	= "DataRequest",
 239		[P_RS_DATA_REQUEST]     = "RSDataRequest",
 240		[P_SYNC_PARAM]	        = "SyncParam",
 241		[P_SYNC_PARAM89]	= "SyncParam89",
 242		[P_PROTOCOL]            = "ReportProtocol",
 243		[P_UUIDS]	        = "ReportUUIDs",
 244		[P_SIZES]	        = "ReportSizes",
 245		[P_STATE]	        = "ReportState",
 246		[P_SYNC_UUID]           = "ReportSyncUUID",
 247		[P_AUTH_CHALLENGE]      = "AuthChallenge",
 248		[P_AUTH_RESPONSE]	= "AuthResponse",
 249		[P_PING]		= "Ping",
 250		[P_PING_ACK]	        = "PingAck",
 251		[P_RECV_ACK]	        = "RecvAck",
 252		[P_WRITE_ACK]	        = "WriteAck",
 253		[P_RS_WRITE_ACK]	= "RSWriteAck",
 254		[P_DISCARD_ACK]	        = "DiscardAck",
 255		[P_NEG_ACK]	        = "NegAck",
 256		[P_NEG_DREPLY]	        = "NegDReply",
 257		[P_NEG_RS_DREPLY]	= "NegRSDReply",
 258		[P_BARRIER_ACK]	        = "BarrierAck",
 259		[P_STATE_CHG_REQ]       = "StateChgRequest",
 260		[P_STATE_CHG_REPLY]     = "StateChgReply",
 261		[P_OV_REQUEST]          = "OVRequest",
 262		[P_OV_REPLY]            = "OVReply",
 263		[P_OV_RESULT]           = "OVResult",
 264		[P_CSUM_RS_REQUEST]     = "CsumRSRequest",
 265		[P_RS_IS_IN_SYNC]	= "CsumRSIsInSync",
 266		[P_COMPRESSED_BITMAP]   = "CBitmap",
 267		[P_DELAY_PROBE]         = "DelayProbe",
 268		[P_OUT_OF_SYNC]		= "OutOfSync",
 269		[P_MAX_CMD]	        = NULL,
 270	};
 271
 272	if (cmd == P_HAND_SHAKE_M)
 273		return "HandShakeM";
 274	if (cmd == P_HAND_SHAKE_S)
 275		return "HandShakeS";
 276	if (cmd == P_HAND_SHAKE)
 277		return "HandShake";
 278	if (cmd >= P_MAX_CMD)
 279		return "Unknown";
 280	return cmdnames[cmd];
 281}
 282
 283/* for sending/receiving the bitmap,
 284 * possibly in some encoding scheme */
 285struct bm_xfer_ctx {
 286	/* "const"
 287	 * stores total bits and long words
 288	 * of the bitmap, so we don't need to
 289	 * call the accessor functions over and again. */
 290	unsigned long bm_bits;
 291	unsigned long bm_words;
 292	/* during xfer, current position within the bitmap */
 293	unsigned long bit_offset;
 294	unsigned long word_offset;
 295
 296	/* statistics; index: (h->command == P_BITMAP) */
 297	unsigned packets[2];
 298	unsigned bytes[2];
 299};
 300
 301extern void INFO_bm_xfer_stats(struct drbd_conf *mdev,
 302		const char *direction, struct bm_xfer_ctx *c);
 303
 304static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
 305{
 306	/* word_offset counts "native long words" (32 or 64 bit),
 307	 * aligned at 64 bit.
 308	 * Encoded packet may end at an unaligned bit offset.
 309	 * In case a fallback clear text packet is transmitted in
 310	 * between, we adjust this offset back to the last 64bit
 311	 * aligned "native long word", which makes coding and decoding
 312	 * the plain text bitmap much more convenient.  */
 313#if BITS_PER_LONG == 64
 314	c->word_offset = c->bit_offset >> 6;
 315#elif BITS_PER_LONG == 32
 316	c->word_offset = c->bit_offset >> 5;
 317	c->word_offset &= ~(1UL);
 318#else
 319# error "unsupported BITS_PER_LONG"
 320#endif
 321}
 322
 323#ifndef __packed
 324#define __packed __attribute__((packed))
 325#endif
 326
 327/* This is the layout for a packet on the wire.
 328 * The byteorder is the network byte order.
 329 *     (except block_id and barrier fields.
 330 *	these are pointers to local structs
 331 *	and have no relevance for the partner,
 332 *	which just echoes them as received.)
 333 *
 334 * NOTE that the payload starts at a long aligned offset,
 335 * regardless of 32 or 64 bit arch!
 336 */
 337struct p_header80 {
 338	u32	  magic;
 339	u16	  command;
 340	u16	  length;	/* bytes of data after this header */
 341	u8	  payload[0];
 342} __packed;
 343
 344/* Header for big packets, Used for data packets exceeding 64kB */
 345struct p_header95 {
 346	u16	  magic;	/* use DRBD_MAGIC_BIG here */
 347	u16	  command;
 348	u32	  length;	/* Use only 24 bits of that. Ignore the highest 8 bit. */
 349	u8	  payload[0];
 350} __packed;
 351
 352union p_header {
 353	struct p_header80 h80;
 354	struct p_header95 h95;
 355};
 356
 357/*
 358 * short commands, packets without payload, plain p_header:
 359 *   P_PING
 360 *   P_PING_ACK
 361 *   P_BECOME_SYNC_TARGET
 362 *   P_BECOME_SYNC_SOURCE
 363 *   P_UNPLUG_REMOTE
 364 */
 365
 366/*
 367 * commands with out-of-struct payload:
 368 *   P_BITMAP    (no additional fields)
 369 *   P_DATA, P_DATA_REPLY (see p_data)
 370 *   P_COMPRESSED_BITMAP (see receive_compressed_bitmap)
 371 */
 372
 373/* these defines must not be changed without changing the protocol version */
 374#define DP_HARDBARRIER	      1 /* depricated */
 375#define DP_RW_SYNC	      2 /* equals REQ_SYNC    */
 376#define DP_MAY_SET_IN_SYNC    4
 377#define DP_UNPLUG             8 /* not used anymore   */
 378#define DP_FUA               16 /* equals REQ_FUA     */
 379#define DP_FLUSH             32 /* equals REQ_FLUSH   */
 380#define DP_DISCARD           64 /* equals REQ_DISCARD */
 381
 382struct p_data {
 383	union p_header head;
 384	u64	    sector;    /* 64 bits sector number */
 385	u64	    block_id;  /* to identify the request in protocol B&C */
 386	u32	    seq_num;
 387	u32	    dp_flags;
 388} __packed;
 389
 390/*
 391 * commands which share a struct:
 392 *  p_block_ack:
 393 *   P_RECV_ACK (proto B), P_WRITE_ACK (proto C),
 394 *   P_DISCARD_ACK (proto C, two-primaries conflict detection)
 395 *  p_block_req:
 396 *   P_DATA_REQUEST, P_RS_DATA_REQUEST
 397 */
 398struct p_block_ack {
 399	struct p_header80 head;
 400	u64	    sector;
 401	u64	    block_id;
 402	u32	    blksize;
 403	u32	    seq_num;
 404} __packed;
 405
 406
 407struct p_block_req {
 408	struct p_header80 head;
 409	u64 sector;
 410	u64 block_id;
 411	u32 blksize;
 412	u32 pad;	/* to multiple of 8 Byte */
 413} __packed;
 414
 415/*
 416 * commands with their own struct for additional fields:
 417 *   P_HAND_SHAKE
 418 *   P_BARRIER
 419 *   P_BARRIER_ACK
 420 *   P_SYNC_PARAM
 421 *   ReportParams
 422 */
 423
 424struct p_handshake {
 425	struct p_header80 head;	/* 8 bytes */
 426	u32 protocol_min;
 427	u32 feature_flags;
 428	u32 protocol_max;
 429
 430	/* should be more than enough for future enhancements
 431	 * for now, feature_flags and the reserverd array shall be zero.
 432	 */
 433
 434	u32 _pad;
 435	u64 reserverd[7];
 436} __packed;
 437/* 80 bytes, FIXED for the next century */
 438
 439struct p_barrier {
 440	struct p_header80 head;
 441	u32 barrier;	/* barrier number _handle_ only */
 442	u32 pad;	/* to multiple of 8 Byte */
 443} __packed;
 444
 445struct p_barrier_ack {
 446	struct p_header80 head;
 447	u32 barrier;
 448	u32 set_size;
 449} __packed;
 450
 451struct p_rs_param {
 452	struct p_header80 head;
 453	u32 rate;
 454
 455	      /* Since protocol version 88 and higher. */
 456	char verify_alg[0];
 457} __packed;
 458
 459struct p_rs_param_89 {
 460	struct p_header80 head;
 461	u32 rate;
 462        /* protocol version 89: */
 463	char verify_alg[SHARED_SECRET_MAX];
 464	char csums_alg[SHARED_SECRET_MAX];
 465} __packed;
 466
 467struct p_rs_param_95 {
 468	struct p_header80 head;
 469	u32 rate;
 470	char verify_alg[SHARED_SECRET_MAX];
 471	char csums_alg[SHARED_SECRET_MAX];
 472	u32 c_plan_ahead;
 473	u32 c_delay_target;
 474	u32 c_fill_target;
 475	u32 c_max_rate;
 476} __packed;
 477
 478enum drbd_conn_flags {
 479	CF_WANT_LOSE = 1,
 480	CF_DRY_RUN = 2,
 481};
 482
 483struct p_protocol {
 484	struct p_header80 head;
 485	u32 protocol;
 486	u32 after_sb_0p;
 487	u32 after_sb_1p;
 488	u32 after_sb_2p;
 489	u32 conn_flags;
 490	u32 two_primaries;
 491
 492              /* Since protocol version 87 and higher. */
 493	char integrity_alg[0];
 494
 495} __packed;
 496
 497struct p_uuids {
 498	struct p_header80 head;
 499	u64 uuid[UI_EXTENDED_SIZE];
 500} __packed;
 501
 502struct p_rs_uuid {
 503	struct p_header80 head;
 504	u64	    uuid;
 505} __packed;
 506
 507struct p_sizes {
 508	struct p_header80 head;
 509	u64	    d_size;  /* size of disk */
 510	u64	    u_size;  /* user requested size */
 511	u64	    c_size;  /* current exported size */
 512	u32	    max_bio_size;  /* Maximal size of a BIO */
 513	u16	    queue_order_type;  /* not yet implemented in DRBD*/
 514	u16	    dds_flags; /* use enum dds_flags here. */
 515} __packed;
 516
 517struct p_state {
 518	struct p_header80 head;
 519	u32	    state;
 520} __packed;
 521
 522struct p_req_state {
 523	struct p_header80 head;
 524	u32	    mask;
 525	u32	    val;
 526} __packed;
 527
 528struct p_req_state_reply {
 529	struct p_header80 head;
 530	u32	    retcode;
 531} __packed;
 532
 533struct p_drbd06_param {
 534	u64	  size;
 535	u32	  state;
 536	u32	  blksize;
 537	u32	  protocol;
 538	u32	  version;
 539	u32	  gen_cnt[5];
 540	u32	  bit_map_gen[5];
 541} __packed;
 542
 543struct p_discard {
 544	struct p_header80 head;
 545	u64	    block_id;
 546	u32	    seq_num;
 547	u32	    pad;
 548} __packed;
 549
 550struct p_block_desc {
 551	struct p_header80 head;
 552	u64 sector;
 553	u32 blksize;
 554	u32 pad;	/* to multiple of 8 Byte */
 555} __packed;
 556
 557/* Valid values for the encoding field.
 558 * Bump proto version when changing this. */
 559enum drbd_bitmap_code {
 560	/* RLE_VLI_Bytes = 0,
 561	 * and other bit variants had been defined during
 562	 * algorithm evaluation. */
 563	RLE_VLI_Bits = 2,
 564};
 565
 566struct p_compressed_bm {
 567	struct p_header80 head;
 568	/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
 569	 * (encoding & 0x80): polarity (set/unset) of first runlength
 570	 * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
 571	 * used to pad up to head.length bytes
 572	 */
 573	u8 encoding;
 574
 575	u8 code[0];
 576} __packed;
 577
 578struct p_delay_probe93 {
 579	struct p_header80 head;
 580	u32     seq_num; /* sequence number to match the two probe packets */
 581	u32     offset;  /* usecs the probe got sent after the reference time point */
 582} __packed;
 583
 584/* DCBP: Drbd Compressed Bitmap Packet ... */
 585static inline enum drbd_bitmap_code
 586DCBP_get_code(struct p_compressed_bm *p)
 587{
 588	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
 589}
 590
 591static inline void
 592DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
 593{
 594	BUG_ON(code & ~0xf);
 595	p->encoding = (p->encoding & ~0xf) | code;
 596}
 597
 598static inline int
 599DCBP_get_start(struct p_compressed_bm *p)
 600{
 601	return (p->encoding & 0x80) != 0;
 602}
 603
 604static inline void
 605DCBP_set_start(struct p_compressed_bm *p, int set)
 606{
 607	p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
 608}
 609
 610static inline int
 611DCBP_get_pad_bits(struct p_compressed_bm *p)
 612{
 613	return (p->encoding >> 4) & 0x7;
 614}
 615
 616static inline void
 617DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
 618{
 619	BUG_ON(n & ~0x7);
 620	p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
 621}
 622
 623/* one bitmap packet, including the p_header,
 624 * should fit within one _architecture independend_ page.
 625 * so we need to use the fixed size 4KiB page size
 626 * most architectures have used for a long time.
 627 */
 628#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80))
 629#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
 630#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
 631#if (PAGE_SIZE < 4096)
 632/* drbd_send_bitmap / receive_bitmap would break horribly */
 633#error "PAGE_SIZE too small"
 634#endif
 635
 636union p_polymorph {
 637        union p_header           header;
 638        struct p_handshake       handshake;
 639        struct p_data            data;
 640        struct p_block_ack       block_ack;
 641        struct p_barrier         barrier;
 642        struct p_barrier_ack     barrier_ack;
 643        struct p_rs_param_89     rs_param_89;
 644        struct p_rs_param_95     rs_param_95;
 645        struct p_protocol        protocol;
 646        struct p_sizes           sizes;
 647        struct p_uuids           uuids;
 648        struct p_state           state;
 649        struct p_req_state       req_state;
 650        struct p_req_state_reply req_state_reply;
 651        struct p_block_req       block_req;
 652	struct p_delay_probe93   delay_probe93;
 653	struct p_rs_uuid         rs_uuid;
 654	struct p_block_desc      block_desc;
 655} __packed;
 656
 657/**********************************************************************/
 658enum drbd_thread_state {
 659	None,
 660	Running,
 661	Exiting,
 662	Restarting
 663};
 664
 665struct drbd_thread {
 666	spinlock_t t_lock;
 667	struct task_struct *task;
 668	struct completion stop;
 669	enum drbd_thread_state t_state;
 670	int (*function) (struct drbd_thread *);
 671	struct drbd_conf *mdev;
 
 672	int reset_cpu_mask;
 
 673};
 674
 675static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
 676{
 677	/* THINK testing the t_state seems to be uncritical in all cases
 678	 * (but thread_{start,stop}), so we can read it *without* the lock.
 679	 *	--lge */
 680
 681	smp_rmb();
 682	return thi->t_state;
 683}
 684
 685struct drbd_work;
 686typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel);
 687struct drbd_work {
 688	struct list_head list;
 689	drbd_work_cb cb;
 690};
 691
 692struct drbd_tl_epoch;
 
 
 
 
 
 
 
 
 
 
 
 693struct drbd_request {
 694	struct drbd_work w;
 695	struct drbd_conf *mdev;
 696
 697	/* if local IO is not allowed, will be NULL.
 698	 * if local IO _is_ allowed, holds the locally submitted bio clone,
 699	 * or, after local IO completion, the ERR_PTR(error).
 700	 * see drbd_endio_pri(). */
 701	struct bio *private_bio;
 702
 703	struct hlist_node collision;
 704	sector_t sector;
 705	unsigned int size;
 706	unsigned int epoch; /* barrier_nr */
 707
 708	/* barrier_nr: used to check on "completion" whether this req was in
 709	 * the current epoch, and we therefore have to close it,
 710	 * starting a new epoch...
 
 
 
 
 711	 */
 
 712
 713	struct list_head tl_requests; /* ring list in the transfer log */
 714	struct bio *master_bio;       /* master bio pointer */
 715	unsigned long rq_state; /* see comments above _req_mod() */
 716	int seq_num;
 717	unsigned long start_time;
 718};
 719
 720struct drbd_tl_epoch {
 721	struct drbd_work w;
 722	struct list_head requests; /* requests before */
 723	struct drbd_tl_epoch *next; /* pointer to the next barrier */
 724	unsigned int br_number;  /* the barriers identifier. */
 725	int n_writes;	/* number of requests attached before this barrier */
 726};
 
 
 
 
 
 
 
 
 
 727
 728struct drbd_request;
 729
 730/* These Tl_epoch_entries may be in one of 6 lists:
 731   active_ee .. data packet being written
 732   sync_ee   .. syncer block being written
 733   done_ee   .. block written, need to send P_WRITE_ACK
 734   read_ee   .. [RS]P_DATA_REQUEST being read
 735*/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 736
 737struct drbd_epoch {
 
 738	struct list_head list;
 739	unsigned int barrier_nr;
 740	atomic_t epoch_size; /* increased on every request added. */
 741	atomic_t active;     /* increased on every req. added, and dec on every finished. */
 742	unsigned long flags;
 743};
 744
 
 
 
 
 745/* drbd_epoch flag bits */
 746enum {
 747	DE_HAVE_BARRIER_NUMBER,
 748};
 749
 750enum epoch_event {
 751	EV_PUT,
 752	EV_GOT_BARRIER_NR,
 753	EV_BECAME_LAST,
 754	EV_CLEANUP = 32, /* used as flag */
 755};
 756
 757struct drbd_wq_barrier {
 758	struct drbd_work w;
 759	struct completion done;
 760};
 761
 762struct digest_info {
 763	int digest_size;
 764	void *digest;
 765};
 766
 767struct drbd_epoch_entry {
 768	struct drbd_work w;
 769	struct hlist_node collision;
 770	struct drbd_epoch *epoch; /* for writes */
 771	struct drbd_conf *mdev;
 772	struct page *pages;
 773	atomic_t pending_bios;
 774	unsigned int size;
 775	/* see comments on ee flag bits below */
 776	unsigned long flags;
 777	sector_t sector;
 778	union {
 779		u64 block_id;
 780		struct digest_info *digest;
 781	};
 782};
 783
 784/* ee flag bits.
 785 * While corresponding bios are in flight, the only modification will be
 786 * set_bit WAS_ERROR, which has to be atomic.
 787 * If no bios are in flight yet, or all have been completed,
 788 * non-atomic modification to ee->flags is ok.
 789 */
 790enum {
 791	__EE_CALL_AL_COMPLETE_IO,
 792	__EE_MAY_SET_IN_SYNC,
 793
 
 
 
 794	/* In case a barrier failed,
 795	 * we need to resubmit without the barrier flag. */
 796	__EE_RESUBMITTED,
 797
 798	/* we may have several bios per epoch entry.
 799	 * if any of those fail, we set this flag atomically
 800	 * from the endio callback */
 801	__EE_WAS_ERROR,
 802
 803	/* This ee has a pointer to a digest instead of a block id */
 804	__EE_HAS_DIGEST,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 805};
 806#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 807#define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
 808#define	EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
 
 809#define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
 810#define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
 
 
 
 
 
 
 
 
 811
 812/* global flag bits */
 813enum {
 814	CREATE_BARRIER,		/* next P_DATA is preceded by a P_BARRIER */
 815	SIGNAL_ASENDER,		/* whether asender wants to be interrupted */
 816	SEND_PING,		/* whether asender should send a ping asap */
 817
 818	UNPLUG_QUEUED,		/* only relevant with kernel 2.4 */
 819	UNPLUG_REMOTE,		/* sending a "UnplugRemote" could help */
 820	MD_DIRTY,		/* current uuids and flags not yet on disk */
 821	DISCARD_CONCURRENT,	/* Set on one node, cleared on the peer! */
 822	USE_DEGR_WFC_T,		/* degr-wfc-timeout instead of wfc-timeout. */
 823	CLUSTER_ST_CHANGE,	/* Cluster wide state change going on... */
 824	CL_ST_CHG_SUCCESS,
 825	CL_ST_CHG_FAIL,
 826	CRASHED_PRIMARY,	/* This node was a crashed primary.
 827				 * Gets cleared when the state.conn
 828				 * goes into C_CONNECTED state. */
 829	NO_BARRIER_SUPP,	/* underlying block device doesn't implement barriers */
 830	CONSIDER_RESYNC,
 831
 832	MD_NO_FUA,		/* Users wants us to not use FUA/FLUSH on meta data dev */
 833	SUSPEND_IO,		/* suspend application io */
 834	BITMAP_IO,		/* suspend application io;
 835				   once no more io in flight, start bitmap io */
 836	BITMAP_IO_QUEUED,       /* Started bitmap IO */
 837	GO_DISKLESS,		/* Disk is being detached, on io-error or admin request. */
 838	WAS_IO_ERROR,		/* Local disk failed returned IO error */
 
 839	RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
 840	NET_CONGESTED,		/* The data socket is congested */
 841
 842	CONFIG_PENDING,		/* serialization of (re)configuration requests.
 843				 * if set, also prevents the device from dying */
 844	DEVICE_DYING,		/* device became unconfigured,
 845				 * but worker thread is still handling the cleanup.
 846				 * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed,
 847				 * while this is set. */
 848	RESIZE_PENDING,		/* Size change detected locally, waiting for the response from
 849				 * the peer, if it changed there as well. */
 850	CONN_DRY_RUN,		/* Expect disconnect after resync handshake. */
 851	GOT_PING_ACK,		/* set when we receive a ping_ack packet, misc wait gets woken */
 852	NEW_CUR_UUID,		/* Create new current UUID when thawing IO */
 853	AL_SUSPENDED,		/* Activity logging is currently suspended. */
 854	AHEAD_TO_SYNC_SOURCE,   /* Ahead -> SyncSource queued */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 855};
 856
 857struct drbd_bitmap; /* opaque for drbd_conf */
 858
 859/* definition of bits in bm_flags to be used in drbd_bm_lock
 860 * and drbd_bitmap_io and friends. */
 861enum bm_flag {
 862	/* do we need to kfree, or vfree bm_pages? */
 863	BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
 864
 865	/* currently locked for bulk operation */
 866	BM_LOCKED_MASK = 0x7,
 867
 868	/* in detail, that is: */
 869	BM_DONT_CLEAR = 0x1,
 870	BM_DONT_SET   = 0x2,
 871	BM_DONT_TEST  = 0x4,
 872
 
 
 
 
 873	/* (test bit, count bit) allowed (common case) */
 874	BM_LOCKED_TEST_ALLOWED = 0x3,
 875
 876	/* testing bits, as well as setting new bits allowed, but clearing bits
 877	 * would be unexpected.  Used during bitmap receive.  Setting new bits
 878	 * requires sending of "out-of-sync" information, though. */
 879	BM_LOCKED_SET_ALLOWED = 0x1,
 880
 881	/* clear is not expected while bitmap is locked for bulk operation */
 
 
 882};
 883
 884
 885/* TODO sort members for performance
 886 * MAYBE group them further */
 887
 888/* THINK maybe we actually want to use the default "event/%s" worker threads
 889 * or similar in linux 2.6, which uses per cpu data and threads.
 890 */
 891struct drbd_work_queue {
 892	struct list_head q;
 893	struct semaphore s; /* producers up it, worker down()s it */
 894	spinlock_t q_lock;  /* to protect the list. */
 
 895};
 896
 897struct drbd_socket {
 898	struct drbd_work_queue work;
 899	struct mutex mutex;
 900	struct socket    *socket;
 901	/* this way we get our
 902	 * send/receive buffers off the stack */
 903	union p_polymorph sbuf;
 904	union p_polymorph rbuf;
 905};
 906
 907struct drbd_md {
 908	u64 md_offset;		/* sector offset to 'super' block */
 909
 910	u64 la_size_sect;	/* last agreed size, unit sectors */
 
 911	u64 uuid[UI_SIZE];
 912	u64 device_uuid;
 913	u32 flags;
 914	u32 md_size_sect;
 915
 916	s32 al_offset;	/* signed relative sector offset to al area */
 917	s32 bm_offset;	/* signed relative sector offset to bitmap */
 918
 919	/* u32 al_nr_extents;	   important for restoring the AL
 920	 * is stored into  sync_conf.al_extents, which in turn
 921	 * gets applied to act_log->nr_elements
 922	 */
 923};
 924
 925/* for sync_conf and other types... */
 926#define NL_PACKET(name, number, fields) struct name { fields };
 927#define NL_INTEGER(pn,pr,member) int member;
 928#define NL_INT64(pn,pr,member) __u64 member;
 929#define NL_BIT(pn,pr,member)   unsigned member:1;
 930#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len;
 931#include "linux/drbd_nl.h"
 932
 933struct drbd_backing_dev {
 934	struct block_device *backing_bdev;
 935	struct block_device *md_bdev;
 936	struct drbd_md md;
 937	struct disk_conf dc; /* The user provided config... */
 938	sector_t known_size; /* last known size of that backing device */
 939};
 940
 941struct drbd_md_io {
 942	struct drbd_conf *mdev;
 943	struct completion event;
 
 
 
 
 944	int error;
 945};
 946
 947struct bm_io_work {
 948	struct drbd_work w;
 949	char *why;
 950	enum bm_flag flags;
 951	int (*io_fn)(struct drbd_conf *mdev);
 952	void (*done)(struct drbd_conf *mdev, int rv);
 953};
 954
 955enum write_ordering_e {
 956	WO_none,
 957	WO_drain_io,
 958	WO_bdev_flush,
 959};
 960
 961struct fifo_buffer {
 962	int *values;
 963	unsigned int head_index;
 964	unsigned int size;
 
 
 965};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 966
 967struct drbd_conf {
 968	/* things that are stored as / read from meta data on disk */
 969	unsigned long flags;
 970
 971	/* configured by drbdsetup */
 972	struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */
 973	struct syncer_conf sync_conf;
 974	struct drbd_backing_dev *ldev __protected_by(local);
 975
 976	sector_t p_size;     /* partner's disk size */
 977	struct request_queue *rq_queue;
 978	struct block_device *this_bdev;
 979	struct gendisk	    *vdisk;
 980
 981	struct drbd_socket data; /* data/barrier/cstate/parameter packets */
 982	struct drbd_socket meta; /* ping/ack (metadata) packets */
 983	int agreed_pro_version;  /* actually used protocol version */
 984	unsigned long last_received; /* in jiffies, either socket */
 985	unsigned int ko_count;
 986	struct drbd_work  resync_work,
 987			  unplug_work,
 988			  go_diskless,
 989			  md_sync_work,
 990			  start_resync_work;
 991	struct timer_list resync_timer;
 992	struct timer_list md_sync_timer;
 993	struct timer_list start_resync_timer;
 994	struct timer_list request_timer;
 995#ifdef DRBD_DEBUG_MD_SYNC
 996	struct {
 997		unsigned int line;
 998		const char* func;
 999	} last_md_mark_dirty;
1000#endif
1001
1002	/* Used after attach while negotiating new disk state. */
1003	union drbd_state new_state_tmp;
1004
1005	union drbd_state state;
1006	wait_queue_head_t misc_wait;
1007	wait_queue_head_t state_wait;  /* upon each state change. */
1008	wait_queue_head_t net_cnt_wait;
1009	unsigned int send_cnt;
1010	unsigned int recv_cnt;
1011	unsigned int read_cnt;
1012	unsigned int writ_cnt;
1013	unsigned int al_writ_cnt;
1014	unsigned int bm_writ_cnt;
1015	atomic_t ap_bio_cnt;	 /* Requests we need to complete */
 
1016	atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
1017	atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
1018	atomic_t unacked_cnt;	 /* Need to send replys for */
1019	atomic_t local_cnt;	 /* Waiting for local completion */
1020	atomic_t net_cnt;	 /* Users of net_conf */
1021	spinlock_t req_lock;
1022	struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */
1023	struct drbd_tl_epoch *newest_tle;
1024	struct drbd_tl_epoch *oldest_tle;
1025	struct list_head out_of_sequence_requests;
1026	struct hlist_head *tl_hash;
1027	unsigned int tl_hash_s;
1028
 
 
 
 
 
 
 
 
 
 
 
1029	/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
1030	unsigned long rs_total;
1031	/* number of resync blocks that failed in this run */
1032	unsigned long rs_failed;
1033	/* Syncer's start time [unit jiffies] */
1034	unsigned long rs_start;
1035	/* cumulated time in PausedSyncX state [unit jiffies] */
1036	unsigned long rs_paused;
1037	/* skipped because csum was equal [unit BM_BLOCK_SIZE] */
1038	unsigned long rs_same_csum;
1039#define DRBD_SYNC_MARKS 8
1040#define DRBD_SYNC_MARK_STEP (3*HZ)
1041	/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
1042	unsigned long rs_mark_left[DRBD_SYNC_MARKS];
1043	/* marks's time [unit jiffies] */
1044	unsigned long rs_mark_time[DRBD_SYNC_MARKS];
1045	/* current index into rs_mark_{left,time} */
1046	int rs_last_mark;
 
1047
1048	/* where does the admin want us to start? (sector) */
1049	sector_t ov_start_sector;
 
1050	/* where are we now? (sector) */
1051	sector_t ov_position;
1052	/* Start sector of out of sync range (to merge printk reporting). */
1053	sector_t ov_last_oos_start;
1054	/* size of out-of-sync range in sectors. */
1055	sector_t ov_last_oos_size;
1056	unsigned long ov_left; /* in bits */
1057	struct crypto_hash *csums_tfm;
1058	struct crypto_hash *verify_tfm;
1059
1060	struct drbd_thread receiver;
1061	struct drbd_thread worker;
1062	struct drbd_thread asender;
1063	struct drbd_bitmap *bitmap;
1064	unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */
1065
1066	/* Used to track operations of resync... */
1067	struct lru_cache *resync;
1068	/* Number of locked elements in resync LRU */
1069	unsigned int resync_locked;
1070	/* resync extent number waiting for application requests */
1071	unsigned int resync_wenr;
1072
1073	int open_cnt;
1074	u64 *p_uuid;
1075	struct drbd_epoch *current_epoch;
1076	spinlock_t epoch_lock;
1077	unsigned int epochs;
1078	enum write_ordering_e write_ordering;
1079	struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */
1080	struct list_head sync_ee;   /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
1081	struct list_head done_ee;   /* send ack */
1082	struct list_head read_ee;   /* IO in progress (any read) */
1083	struct list_head net_ee;    /* zero-copy network send in progress */
1084	struct hlist_head *ee_hash; /* is proteced by req_lock! */
1085	unsigned int ee_hash_s;
1086
1087	/* this one is protected by ee_lock, single thread */
1088	struct drbd_epoch_entry *last_write_w_barrier;
1089
1090	int next_barrier_nr;
1091	struct hlist_head *app_reads_hash; /* is proteced by req_lock */
1092	struct list_head resync_reads;
1093	atomic_t pp_in_use;		/* allocated from page pool */
1094	atomic_t pp_in_use_by_net;	/* sendpage()d, still referenced by tcp */
1095	wait_queue_head_t ee_wait;
1096	struct page *md_io_page;	/* one page buffer for md_io */
1097	struct page *md_io_tmpp;	/* for logical_block_size != 512 */
1098	struct mutex md_io_mutex;	/* protects the md_io_buffer */
1099	spinlock_t al_lock;
1100	wait_queue_head_t al_wait;
1101	struct lru_cache *act_log;	/* activity log */
1102	unsigned int al_tr_number;
1103	int al_tr_cycle;
1104	int al_tr_pos;   /* position of the next transaction in the journal */
1105	struct crypto_hash *cram_hmac_tfm;
1106	struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */
1107	struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */
1108	void *int_dig_out;
1109	void *int_dig_in;
1110	void *int_dig_vv;
1111	wait_queue_head_t seq_wait;
1112	atomic_t packet_seq;
1113	unsigned int peer_seq;
1114	spinlock_t peer_seq_lock;
1115	unsigned int minor;
1116	unsigned long comm_bm_set; /* communicated number of set bits. */
1117	cpumask_var_t cpu_mask;
1118	struct bm_io_work bm_io_work;
1119	u64 ed_uuid; /* UUID of the exposed data */
1120	struct mutex state_mutex;
 
1121	char congestion_reason;  /* Why we where congested... */
1122	atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
1123	atomic_t rs_sect_ev; /* for submitted resync data rate, both */
1124	int rs_last_sect_ev; /* counter to compare with */
1125	int rs_last_events;  /* counter of read or write "events" (unit sectors)
1126			      * on the lower level device when we last looked. */
1127	int c_sync_rate; /* current resync rate after syncer throttle magic */
1128	struct fifo_buffer rs_plan_s; /* correction values of resync planer */
1129	int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
1130	int rs_planed;    /* resync sectors already planned */
1131	atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
1132	int peer_max_bio_size;
1133	int local_max_bio_size;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1134};
1135
1136static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
1137{
1138	struct drbd_conf *mdev;
 
 
 
 
 
 
 
 
1139
1140	mdev = minor < minor_count ? minor_table[minor] : NULL;
 
 
 
 
 
 
 
 
1141
1142	return mdev;
 
 
1143}
1144
1145static inline unsigned int mdev_to_minor(struct drbd_conf *mdev)
1146{
1147	return mdev->minor;
1148}
1149
1150/* returns 1 if it was successful,
1151 * returns 0 if there was no data socket.
1152 * so wherever you are going to use the data.socket, e.g. do
1153 * if (!drbd_get_data_sock(mdev))
1154 *	return 0;
1155 *	CODE();
1156 * drbd_put_data_sock(mdev);
1157 */
1158static inline int drbd_get_data_sock(struct drbd_conf *mdev)
1159{
1160	mutex_lock(&mdev->data.mutex);
1161	/* drbd_disconnect() could have called drbd_free_sock()
1162	 * while we were waiting in down()... */
1163	if (unlikely(mdev->data.socket == NULL)) {
1164		mutex_unlock(&mdev->data.mutex);
1165		return 0;
1166	}
1167	return 1;
1168}
1169
1170static inline void drbd_put_data_sock(struct drbd_conf *mdev)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1171{
1172	mutex_unlock(&mdev->data.mutex);
1173}
1174
1175/*
1176 * function declarations
1177 *************************/
1178
1179/* drbd_main.c */
1180
1181enum chg_state_flags {
1182	CS_HARD	= 1,
1183	CS_VERBOSE = 2,
1184	CS_WAIT_COMPLETE = 4,
1185	CS_SERIALIZE    = 8,
1186	CS_ORDERED      = CS_WAIT_COMPLETE + CS_SERIALIZE,
1187};
1188
1189enum dds_flags {
1190	DDSF_FORCED    = 1,
1191	DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */
1192};
1193
1194extern void drbd_init_set_defaults(struct drbd_conf *mdev);
1195extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
1196					    enum chg_state_flags f,
1197					    union drbd_state mask,
1198					    union drbd_state val);
1199extern void drbd_force_state(struct drbd_conf *, union drbd_state,
1200			union drbd_state);
1201extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
1202					      union drbd_state,
1203					      union drbd_state,
1204					      enum chg_state_flags);
1205extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
1206					   enum chg_state_flags,
1207					   struct completion *done);
1208extern void print_st_err(struct drbd_conf *, union drbd_state,
1209			union drbd_state, int);
1210extern int  drbd_thread_start(struct drbd_thread *thi);
1211extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait);
1212#ifdef CONFIG_SMP
1213extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev);
1214extern void drbd_calc_cpu_mask(struct drbd_conf *mdev);
1215#else
1216#define drbd_thread_current_set_cpu(A) ({})
1217#define drbd_calc_cpu_mask(A) ({})
1218#endif
1219extern void drbd_free_resources(struct drbd_conf *mdev);
1220extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
1221		       unsigned int set_size);
1222extern void tl_clear(struct drbd_conf *mdev);
1223extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
1224extern void drbd_free_sock(struct drbd_conf *mdev);
1225extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
1226			void *buf, size_t size, unsigned msg_flags);
1227extern int drbd_send_protocol(struct drbd_conf *mdev);
1228extern int drbd_send_uuids(struct drbd_conf *mdev);
1229extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
1230extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev);
1231extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags);
1232extern int _drbd_send_state(struct drbd_conf *mdev);
1233extern int drbd_send_state(struct drbd_conf *mdev);
1234extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
1235			enum drbd_packets cmd, struct p_header80 *h,
1236			size_t size, unsigned msg_flags);
1237#define USE_DATA_SOCKET 1
1238#define USE_META_SOCKET 0
1239extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
1240			enum drbd_packets cmd, struct p_header80 *h,
1241			size_t size);
1242extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
1243			char *data, size_t size);
1244extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc);
1245extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr,
1246			u32 set_size);
1247extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
1248			struct drbd_epoch_entry *e);
1249extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
1250			struct p_block_req *rp);
1251extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
1252			struct p_data *dp, int data_size);
1253extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
1254			    sector_t sector, int blksize, u64 block_id);
1255extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req);
1256extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
1257			   struct drbd_epoch_entry *e);
1258extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
1259extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
1260			      sector_t sector, int size, u64 block_id);
1261extern int drbd_send_drequest_csum(struct drbd_conf *mdev,
1262				   sector_t sector,int size,
1263				   void *digest, int digest_size,
1264				   enum drbd_packets cmd);
1265extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size);
1266
1267extern int drbd_send_bitmap(struct drbd_conf *mdev);
1268extern int _drbd_send_bitmap(struct drbd_conf *mdev);
1269extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode);
1270extern void drbd_free_bc(struct drbd_backing_dev *ldev);
1271extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
1272void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
1273
1274extern void drbd_md_sync(struct drbd_conf *mdev);
1275extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
1276extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
1277extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
1278extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
1279extern void _drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
1280extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local);
1281extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local);
1282extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local);
 
 
 
 
1283extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
1284#ifndef DRBD_DEBUG_MD_SYNC
1285extern void drbd_md_mark_dirty(struct drbd_conf *mdev);
1286#else
1287#define drbd_md_mark_dirty(m)	drbd_md_mark_dirty_(m, __LINE__ , __func__ )
1288extern void drbd_md_mark_dirty_(struct drbd_conf *mdev,
1289		unsigned int line, const char *func);
1290#endif
1291extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
1292				 int (*io_fn)(struct drbd_conf *),
1293				 void (*done)(struct drbd_conf *, int),
1294				 char *why, enum bm_flag flags);
1295extern int drbd_bitmap_io(struct drbd_conf *mdev,
1296		int (*io_fn)(struct drbd_conf *),
1297		char *why, enum bm_flag flags);
1298extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
1299extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
1300extern void drbd_go_diskless(struct drbd_conf *mdev);
1301extern void drbd_ldev_destroy(struct drbd_conf *mdev);
1302
1303
1304/* Meta data layout
1305   We reserve a 128MB Block (4k aligned)
1306   * either at the end of the backing device
1307   * or on a separate meta data device. */
1308
1309#define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
1310/* The following numbers are sectors */
1311#define MD_AL_OFFSET 8	    /* 8 Sectors after start of meta area */
1312#define MD_AL_MAX_SIZE 64   /* = 32 kb LOG  ~ 3776 extents ~ 14 GB Storage */
1313/* Allows up to about 3.8TB */
1314#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE)
1315
1316/* Since the smalles IO unit is usually 512 byte */
1317#define MD_SECTOR_SHIFT	 9
1318#define MD_SECTOR_SIZE	 (1<<MD_SECTOR_SHIFT)
1319
1320/* activity log */
1321#define AL_EXTENTS_PT ((MD_SECTOR_SIZE-12)/8-1) /* 61 ; Extents per 512B sector */
1322#define AL_EXTENT_SHIFT 22		 /* One extent represents 4M Storage */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1323#define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT)
1324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1325#if BITS_PER_LONG == 32
1326#define LN2_BPL 5
1327#define cpu_to_lel(A) cpu_to_le32(A)
1328#define lel_to_cpu(A) le32_to_cpu(A)
1329#elif BITS_PER_LONG == 64
1330#define LN2_BPL 6
1331#define cpu_to_lel(A) cpu_to_le64(A)
1332#define lel_to_cpu(A) le64_to_cpu(A)
1333#else
1334#error "LN2 of BITS_PER_LONG unknown!"
1335#endif
1336
1337/* resync bitmap */
1338/* 16MB sized 'bitmap extent' to track syncer usage */
1339struct bm_extent {
1340	int rs_left; /* number of bits set (out of sync) in this extent. */
1341	int rs_failed; /* number of failed resync requests in this extent. */
1342	unsigned long flags;
1343	struct lc_element lce;
1344};
1345
1346#define BME_NO_WRITES  0  /* bm_extent.flags: no more requests on this one! */
1347#define BME_LOCKED     1  /* bm_extent.flags: syncer active on this one. */
1348#define BME_PRIORITY   2  /* finish resync IO on this extent ASAP! App IO waiting! */
1349
1350/* drbd_bitmap.c */
1351/*
1352 * We need to store one bit for a block.
1353 * Example: 1GB disk @ 4096 byte blocks ==> we need 32 KB bitmap.
1354 * Bit 0 ==> local node thinks this block is binary identical on both nodes
1355 * Bit 1 ==> local node thinks this block needs to be synced.
1356 */
1357
1358#define SLEEP_TIME (HZ/10)
1359
1360#define BM_BLOCK_SHIFT  12			 /* 4k per bit */
 
 
1361#define BM_BLOCK_SIZE	 (1<<BM_BLOCK_SHIFT)
1362/* (9+3) : 512 bytes @ 8 bits; representing 16M storage
1363 * per sector of on disk bitmap */
1364#define BM_EXT_SHIFT	 (BM_BLOCK_SHIFT + MD_SECTOR_SHIFT + 3)  /* = 24 */
 
1365#define BM_EXT_SIZE	 (1<<BM_EXT_SHIFT)
1366
1367#if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12)
1368#error "HAVE YOU FIXED drbdmeta AS WELL??"
1369#endif
1370
1371/* thus many _storage_ sectors are described by one bit */
1372#define BM_SECT_TO_BIT(x)   ((x)>>(BM_BLOCK_SHIFT-9))
1373#define BM_BIT_TO_SECT(x)   ((sector_t)(x)<<(BM_BLOCK_SHIFT-9))
1374#define BM_SECT_PER_BIT     BM_BIT_TO_SECT(1)
1375
1376/* bit to represented kilo byte conversion */
1377#define Bit2KB(bits) ((bits)<<(BM_BLOCK_SHIFT-10))
1378
1379/* in which _bitmap_ extent (resp. sector) the bit for a certain
1380 * _storage_ sector is located in */
1381#define BM_SECT_TO_EXT(x)   ((x)>>(BM_EXT_SHIFT-9))
 
1382
1383/* how much _storage_ sectors we have per bitmap sector */
1384#define BM_EXT_TO_SECT(x)   ((sector_t)(x) << (BM_EXT_SHIFT-9))
 
1385#define BM_SECT_PER_EXT     BM_EXT_TO_SECT(1)
 
 
 
 
 
1386
1387/* in one sector of the bitmap, we have this many activity_log extents. */
1388#define AL_EXT_PER_BM_SECT  (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT))
1389#define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
1390
1391#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT)
1392#define BM_BLOCKS_PER_BM_EXT_MASK  ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1)
1393
1394/* the extent in "PER_EXTENT" below is an activity log extent
1395 * we need that many (long words/bytes) to store the bitmap
1396 *		     of one AL_EXTENT_SIZE chunk of storage.
1397 * we can store the bitmap for that many AL_EXTENTS within
1398 * one sector of the _on_disk_ bitmap:
1399 * bit	 0	  bit 37   bit 38	     bit (512*8)-1
1400 *	     ...|........|........|.. // ..|........|
1401 * sect. 0	 `296	  `304			   ^(512*8*8)-1
1402 *
1403#define BM_WORDS_PER_EXT    ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / BITS_PER_LONG )
1404#define BM_BYTES_PER_EXT    ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / 8 )  // 128
1405#define BM_EXT_PER_SECT	    ( 512 / BM_BYTES_PER_EXTENT )	 //   4
1406 */
1407
1408#define DRBD_MAX_SECTORS_32 (0xffffffffLU)
1409#define DRBD_MAX_SECTORS_BM \
1410	  ((MD_RESERVED_SECT - MD_BM_OFFSET) * (1LL<<(BM_EXT_SHIFT-9)))
1411#if DRBD_MAX_SECTORS_BM < DRBD_MAX_SECTORS_32
1412#define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_BM
1413#define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_BM
1414#elif !defined(CONFIG_LBDAF) && BITS_PER_LONG == 32
 
 
1415#define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_32
1416#define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_32
1417#else
1418#define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_BM
1419/* 16 TB in units of sectors */
1420#if BITS_PER_LONG == 32
1421/* adjust by one page worth of bitmap,
1422 * so we won't wrap around in drbd_bm_find_next_bit.
1423 * you should use 64bit OS for that much storage, anyways. */
1424#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff)
1425#else
1426/* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */
1427#define DRBD_MAX_SECTORS_FLEX (1UL << 51)
1428/* corresponds to (1UL << 38) bits right now. */
1429#endif
1430#endif
1431
1432/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables.
1433 * With a value of 8 all IO in one 128K block make it to the same slot of the
1434 * hash table. */
1435#define HT_SHIFT 8
1436#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT))
1437#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12)       /* Works always = 4k */
1438
1439#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
1440
1441/* Number of elements in the app_reads_hash */
1442#define APP_R_HSIZE 15
1443
1444extern int  drbd_bm_init(struct drbd_conf *mdev);
1445extern int  drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits);
1446extern void drbd_bm_cleanup(struct drbd_conf *mdev);
1447extern void drbd_bm_set_all(struct drbd_conf *mdev);
1448extern void drbd_bm_clear_all(struct drbd_conf *mdev);
 
 
 
 
 
 
 
 
 
1449/* set/clear/test only a few bits at a time */
1450extern int  drbd_bm_set_bits(
1451		struct drbd_conf *mdev, unsigned long s, unsigned long e);
1452extern int  drbd_bm_clear_bits(
1453		struct drbd_conf *mdev, unsigned long s, unsigned long e);
1454extern int drbd_bm_count_bits(
1455	struct drbd_conf *mdev, const unsigned long s, const unsigned long e);
1456/* bm_set_bits variant for use while holding drbd_bm_lock,
1457 * may process the whole bitmap in one go */
1458extern void _drbd_bm_set_bits(struct drbd_conf *mdev,
1459		const unsigned long s, const unsigned long e);
1460extern int  drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr);
1461extern int  drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
1462extern int  drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
1463extern int  drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
1464extern int  drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
1465extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
1466		unsigned long al_enr);
1467extern size_t	     drbd_bm_words(struct drbd_conf *mdev);
1468extern unsigned long drbd_bm_bits(struct drbd_conf *mdev);
1469extern sector_t      drbd_bm_capacity(struct drbd_conf *mdev);
 
 
 
1470
1471#define DRBD_END_OF_BITMAP	(~(unsigned long)0)
1472extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
1473/* bm_find_next variants for use while you hold drbd_bm_lock() */
1474extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
1475extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo);
1476extern unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev);
1477extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev);
1478extern int drbd_bm_rs_done(struct drbd_conf *mdev);
1479/* for receive_bitmap */
1480extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset,
1481		size_t number, unsigned long *buffer);
1482/* for _drbd_send_bitmap */
1483extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset,
1484		size_t number, unsigned long *buffer);
1485
1486extern void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags);
1487extern void drbd_bm_unlock(struct drbd_conf *mdev);
1488/* drbd_main.c */
1489
1490extern struct kmem_cache *drbd_request_cache;
1491extern struct kmem_cache *drbd_ee_cache;	/* epoch entries */
1492extern struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
1493extern struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
1494extern mempool_t *drbd_request_mempool;
1495extern mempool_t *drbd_ee_mempool;
1496
1497extern struct page *drbd_pp_pool; /* drbd's page pool */
 
 
 
 
 
 
 
 
 
 
 
 
 
1498extern spinlock_t   drbd_pp_lock;
1499extern int	    drbd_pp_vacant;
1500extern wait_queue_head_t drbd_pp_wait;
1501
1502extern rwlock_t global_state_lock;
1503
1504extern struct drbd_conf *drbd_new_device(unsigned int minor);
1505extern void drbd_free_mdev(struct drbd_conf *mdev);
1506
1507extern int proc_details;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1508
1509/* drbd_req */
1510extern int drbd_make_request(struct request_queue *q, struct bio *bio);
1511extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
1512extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
 
1513extern int is_valid_ar_handle(struct drbd_request *, sector_t);
1514
1515
1516/* drbd_nl.c */
1517extern void drbd_suspend_io(struct drbd_conf *mdev);
1518extern void drbd_resume_io(struct drbd_conf *mdev);
 
 
 
1519extern char *ppsize(char *buf, unsigned long long size);
1520extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
1521enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
1522extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
1523extern void resync_after_online_grow(struct drbd_conf *);
1524extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
1525extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
 
 
 
 
 
 
 
 
 
 
1526					enum drbd_role new_role,
1527					int force);
1528extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
1529extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev);
1530extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
 
1531
1532/* drbd_worker.c */
 
 
 
 
1533extern int drbd_worker(struct drbd_thread *thi);
1534extern int drbd_alter_sa(struct drbd_conf *mdev, int na);
1535extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side);
1536extern void resume_next_sg(struct drbd_conf *mdev);
1537extern void suspend_other_sg(struct drbd_conf *mdev);
1538extern int drbd_resync_finished(struct drbd_conf *mdev);
 
1539/* maybe rather drbd_main.c ? */
1540extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
1541		struct drbd_backing_dev *bdev, sector_t sector, int rw);
1542extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
1543extern void drbd_rs_controller_reset(struct drbd_conf *mdev);
1544
1545static inline void ov_oos_print(struct drbd_conf *mdev)
1546{
1547	if (mdev->ov_last_oos_size) {
1548		dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n",
1549		     (unsigned long long)mdev->ov_last_oos_start,
1550		     (unsigned long)mdev->ov_last_oos_size);
 
 
 
 
1551	}
1552	mdev->ov_last_oos_size=0;
1553}
1554
1555
1556extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
1557extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_epoch_entry *, void *);
1558/* worker callbacks */
1559extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int);
1560extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int);
1561extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int);
1562extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int);
1563extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int);
1564extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int);
1565extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int);
1566extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int);
1567extern int w_resync_timer(struct drbd_conf *, struct drbd_work *, int);
1568extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int);
1569extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int);
1570extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int);
1571extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
1572extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
1573extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
1574extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
1575extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
1576extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int);
1577extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int);
1578
1579extern void resync_timer_fn(unsigned long data);
1580extern void start_resync_timer_fn(unsigned long data);
1581
1582/* drbd_receiver.c */
1583extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector);
1584extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1585		const unsigned rw, const int fault_type);
1586extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
1587extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
1588					    u64 id,
1589					    sector_t sector,
1590					    unsigned int data_size,
1591					    gfp_t gfp_mask) __must_hold(local);
1592extern void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1593		int is_net);
1594#define drbd_free_ee(m,e)	drbd_free_some_ee(m, e, 0)
1595#define drbd_free_net_ee(m,e)	drbd_free_some_ee(m, e, 1)
1596extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
1597		struct list_head *head);
1598extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
1599		struct list_head *head);
1600extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled);
1601extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed);
1602extern void drbd_flush_workqueue(struct drbd_conf *mdev);
1603extern void drbd_free_tl_hash(struct drbd_conf *mdev);
1604
1605/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to
1606 * mess with get_fs/set_fs, we know we are KERNEL_DS always. */
1607static inline int drbd_setsockopt(struct socket *sock, int level, int optname,
1608			char __user *optval, int optlen)
1609{
1610	int err;
1611	if (level == SOL_SOCKET)
1612		err = sock_setsockopt(sock, level, optname, optval, optlen);
1613	else
1614		err = sock->ops->setsockopt(sock, level, optname, optval,
1615					    optlen);
1616	return err;
1617}
1618
1619static inline void drbd_tcp_cork(struct socket *sock)
1620{
1621	int __user val = 1;
1622	(void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
1623			(char __user *)&val, sizeof(val));
1624}
1625
1626static inline void drbd_tcp_uncork(struct socket *sock)
1627{
1628	int __user val = 0;
1629	(void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
1630			(char __user *)&val, sizeof(val));
1631}
1632
1633static inline void drbd_tcp_nodelay(struct socket *sock)
1634{
1635	int __user val = 1;
1636	(void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY,
1637			(char __user *)&val, sizeof(val));
1638}
1639
1640static inline void drbd_tcp_quickack(struct socket *sock)
1641{
1642	int __user val = 2;
1643	(void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
1644			(char __user *)&val, sizeof(val));
1645}
1646
1647void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1648
1649/* drbd_proc.c */
1650extern struct proc_dir_entry *drbd_proc;
1651extern const struct file_operations drbd_proc_fops;
1652extern const char *drbd_conn_str(enum drbd_conns s);
1653extern const char *drbd_role_str(enum drbd_role s);
1654
1655/* drbd_actlog.c */
1656extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector);
1657extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector);
1658extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector);
1659extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
1660extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
1661extern void drbd_rs_cancel_all(struct drbd_conf *mdev);
1662extern int drbd_rs_del_all(struct drbd_conf *mdev);
1663extern void drbd_rs_failed_io(struct drbd_conf *mdev,
 
 
 
 
1664		sector_t sector, int size);
1665extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *);
1666extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go);
1667extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
1668		int size, const char *file, const unsigned int line);
1669#define drbd_set_in_sync(mdev, sector, size) \
1670	__drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__)
1671extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
1672		int size, const char *file, const unsigned int line);
1673#define drbd_set_out_of_sync(mdev, sector, size) \
1674	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
1675extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
1676extern void drbd_al_shrink(struct drbd_conf *mdev);
1677
 
 
 
 
 
 
 
 
 
 
 
1678
1679/* drbd_nl.c */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1680
1681void drbd_nl_cleanup(void);
1682int __init drbd_nl_init(void);
1683void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state);
1684void drbd_bcast_sync_progress(struct drbd_conf *mdev);
1685void drbd_bcast_ee(struct drbd_conf *mdev,
1686		const char *reason, const int dgs,
1687		const char* seen_hash, const char* calc_hash,
1688		const struct drbd_epoch_entry* e);
1689
1690
1691/**
1692 * DOC: DRBD State macros
1693 *
1694 * These macros are used to express state changes in easily readable form.
1695 *
1696 * The NS macros expand to a mask and a value, that can be bit ored onto the
1697 * current state as soon as the spinlock (req_lock) was taken.
1698 *
1699 * The _NS macros are used for state functions that get called with the
1700 * spinlock. These macros expand directly to the new state value.
1701 *
1702 * Besides the basic forms NS() and _NS() additional _?NS[23] are defined
1703 * to express state changes that affect more than one aspect of the state.
1704 *
1705 * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
1706 * Means that the network connection was established and that the peer
1707 * is in secondary role.
1708 */
1709#define role_MASK R_MASK
1710#define peer_MASK R_MASK
1711#define disk_MASK D_MASK
1712#define pdsk_MASK D_MASK
1713#define conn_MASK C_MASK
1714#define susp_MASK 1
1715#define user_isp_MASK 1
1716#define aftr_isp_MASK 1
1717#define susp_nod_MASK 1
1718#define susp_fen_MASK 1
1719
1720#define NS(T, S) \
1721	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
1722	({ union drbd_state val; val.i = 0; val.T = (S); val; })
1723#define NS2(T1, S1, T2, S2) \
1724	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
1725	  mask.T2 = T2##_MASK; mask; }), \
1726	({ union drbd_state val; val.i = 0; val.T1 = (S1); \
1727	  val.T2 = (S2); val; })
1728#define NS3(T1, S1, T2, S2, T3, S3) \
1729	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
1730	  mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
1731	({ union drbd_state val;  val.i = 0; val.T1 = (S1); \
1732	  val.T2 = (S2); val.T3 = (S3); val; })
1733
1734#define _NS(D, T, S) \
1735	D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; })
1736#define _NS2(D, T1, S1, T2, S2) \
1737	D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
1738	__ns.T2 = (S2); __ns; })
1739#define _NS3(D, T1, S1, T2, S2, T3, S3) \
1740	D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
1741	__ns.T2 = (S2); __ns.T3 = (S3); __ns; })
1742
1743/*
1744 * inline helper functions
1745 *************************/
1746
1747/* see also page_chain_add and friends in drbd_receiver.c */
1748static inline struct page *page_chain_next(struct page *page)
1749{
1750	return (struct page *)page_private(page);
1751}
1752#define page_chain_for_each(page) \
1753	for (; page && ({ prefetch(page_chain_next(page)); 1; }); \
1754			page = page_chain_next(page))
1755#define page_chain_for_each_safe(page, n) \
1756	for (; page && ({ n = page_chain_next(page); 1; }); page = n)
1757
1758static inline int drbd_bio_has_active_page(struct bio *bio)
1759{
1760	struct bio_vec *bvec;
1761	int i;
1762
1763	__bio_for_each_segment(bvec, bio, i, 0) {
1764		if (page_count(bvec->bv_page) > 1)
1765			return 1;
1766	}
1767
1768	return 0;
1769}
1770
1771static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e)
1772{
1773	struct page *page = e->pages;
1774	page_chain_for_each(page) {
1775		if (page_count(page) > 1)
1776			return 1;
1777	}
1778	return 0;
1779}
1780
1781
1782static inline void drbd_state_lock(struct drbd_conf *mdev)
1783{
1784	wait_event(mdev->misc_wait,
1785		   !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags));
1786}
1787
1788static inline void drbd_state_unlock(struct drbd_conf *mdev)
1789{
1790	clear_bit(CLUSTER_ST_CHANGE, &mdev->flags);
1791	wake_up(&mdev->misc_wait);
1792}
1793
1794static inline enum drbd_state_rv
1795_drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1796		enum chg_state_flags flags, struct completion *done)
1797{
1798	enum drbd_state_rv rv;
1799
1800	read_lock(&global_state_lock);
1801	rv = __drbd_set_state(mdev, ns, flags, done);
1802	read_unlock(&global_state_lock);
 
1803
1804	return rv;
1805}
1806
1807/**
1808 * drbd_request_state() - Reqest a state change
1809 * @mdev:	DRBD device.
1810 * @mask:	mask of state bits to change.
1811 * @val:	value of new state bits.
1812 *
1813 * This is the most graceful way of requesting a state change. It is verbose
1814 * quite verbose in case the state change is not possible, and all those
1815 * state changes are globally serialized.
1816 */
1817static inline int drbd_request_state(struct drbd_conf *mdev,
1818				     union drbd_state mask,
1819				     union drbd_state val)
1820{
1821	return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
1822}
1823
1824#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
1825static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where)
1826{
1827	switch (mdev->ldev->dc.on_io_error) {
1828	case EP_PASS_ON:
1829		if (!forcedetach) {
 
 
 
 
 
 
 
1830			if (__ratelimit(&drbd_ratelimit_state))
1831				dev_err(DEV, "Local IO failed in %s.\n", where);
1832			if (mdev->state.disk > D_INCONSISTENT)
1833				_drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
1834			break;
1835		}
1836		/* NOTE fall through to detach case if forcedetach set */
1837	case EP_DETACH:
1838	case EP_CALL_HELPER:
1839		set_bit(WAS_IO_ERROR, &mdev->flags);
1840		if (mdev->state.disk > D_FAILED) {
1841			_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
1842			dev_err(DEV,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1843				"Local IO failed in %s. Detaching...\n", where);
1844		}
1845		break;
1846	}
1847}
1848
1849/**
1850 * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers
1851 * @mdev:	 DRBD device.
1852 * @error:	 Error code passed to the IO completion callback
1853 * @forcedetach: Force detach. I.e. the error happened while accessing the meta data
1854 *
1855 * See also drbd_main.c:after_state_ch() if (os.disk > D_FAILED && ns.disk == D_FAILED)
1856 */
1857#define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__)
1858static inline void drbd_chk_io_error_(struct drbd_conf *mdev,
1859	int error, int forcedetach, const char *where)
1860{
1861	if (error) {
1862		unsigned long flags;
1863		spin_lock_irqsave(&mdev->req_lock, flags);
1864		__drbd_chk_io_error_(mdev, forcedetach, where);
1865		spin_unlock_irqrestore(&mdev->req_lock, flags);
1866	}
1867}
1868
1869
1870/**
1871 * drbd_md_first_sector() - Returns the first sector number of the meta data area
1872 * @bdev:	Meta data block device.
1873 *
1874 * BTW, for internal meta data, this happens to be the maximum capacity
1875 * we could agree upon with our peer node.
1876 */
1877static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
1878{
1879	switch (bdev->dc.meta_dev_idx) {
1880	case DRBD_MD_INDEX_INTERNAL:
1881	case DRBD_MD_INDEX_FLEX_INT:
1882		return bdev->md.md_offset + bdev->md.bm_offset;
1883	case DRBD_MD_INDEX_FLEX_EXT:
1884	default:
1885		return bdev->md.md_offset;
1886	}
1887}
1888
1889/**
1890 * drbd_md_last_sector() - Return the last sector number of the meta data area
1891 * @bdev:	Meta data block device.
1892 */
1893static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
1894{
1895	switch (bdev->dc.meta_dev_idx) {
1896	case DRBD_MD_INDEX_INTERNAL:
1897	case DRBD_MD_INDEX_FLEX_INT:
1898		return bdev->md.md_offset + MD_AL_OFFSET - 1;
1899	case DRBD_MD_INDEX_FLEX_EXT:
1900	default:
1901		return bdev->md.md_offset + bdev->md.md_size_sect;
1902	}
1903}
1904
1905/* Returns the number of 512 byte sectors of the device */
1906static inline sector_t drbd_get_capacity(struct block_device *bdev)
1907{
1908	/* return bdev ? get_capacity(bdev->bd_disk) : 0; */
1909	return bdev ? i_size_read(bdev->bd_inode) >> 9 : 0;
1910}
1911
1912/**
1913 * drbd_get_max_capacity() - Returns the capacity we announce to out peer
1914 * @bdev:	Meta data block device.
1915 *
1916 * returns the capacity we announce to out peer.  we clip ourselves at the
1917 * various MAX_SECTORS, because if we don't, current implementation will
1918 * oops sooner or later
1919 */
1920static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
1921{
1922	sector_t s;
1923	switch (bdev->dc.meta_dev_idx) {
 
1924	case DRBD_MD_INDEX_INTERNAL:
1925	case DRBD_MD_INDEX_FLEX_INT:
1926		s = drbd_get_capacity(bdev->backing_bdev)
1927			? min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
1928					drbd_md_first_sector(bdev))
1929			: 0;
1930		break;
1931	case DRBD_MD_INDEX_FLEX_EXT:
1932		s = min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
1933				drbd_get_capacity(bdev->backing_bdev));
1934		/* clip at maximum size the meta device can support */
1935		s = min_t(sector_t, s,
1936			BM_EXT_TO_SECT(bdev->md.md_size_sect
1937				     - bdev->md.bm_offset));
1938		break;
1939	default:
1940		s = min_t(sector_t, DRBD_MAX_SECTORS,
1941				drbd_get_capacity(bdev->backing_bdev));
1942	}
1943	return s;
1944}
1945
1946/**
1947 * drbd_md_ss__() - Return the sector number of our meta data super block
1948 * @mdev:	DRBD device.
1949 * @bdev:	Meta data block device.
1950 */
1951static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
1952				    struct drbd_backing_dev *bdev)
1953{
1954	switch (bdev->dc.meta_dev_idx) {
1955	default: /* external, some index */
1956		return MD_RESERVED_SECT * bdev->dc.meta_dev_idx;
1957	case DRBD_MD_INDEX_INTERNAL:
1958		/* with drbd08, internal meta data is always "flexible" */
1959	case DRBD_MD_INDEX_FLEX_INT:
1960		/* sizeof(struct md_on_disk_07) == 4k
1961		 * position: last 4k aligned block of 4k size */
1962		if (!bdev->backing_bdev) {
1963			if (__ratelimit(&drbd_ratelimit_state)) {
1964				dev_err(DEV, "bdev->backing_bdev==NULL\n");
1965				dump_stack();
1966			}
1967			return 0;
1968		}
1969		return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL)
1970			- MD_AL_OFFSET;
1971	case DRBD_MD_INDEX_FLEX_EXT:
1972		return 0;
1973	}
 
 
 
 
 
 
 
 
1974}
1975
1976static inline void
1977drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
1978{
1979	unsigned long flags;
1980	spin_lock_irqsave(&q->q_lock, flags);
1981	list_add(&w->list, &q->q);
1982	up(&q->s); /* within the spinlock,
1983		      see comment near end of drbd_worker() */
1984	spin_unlock_irqrestore(&q->q_lock, flags);
 
1985}
1986
1987static inline void
1988drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
1989{
1990	unsigned long flags;
1991	spin_lock_irqsave(&q->q_lock, flags);
1992	list_add_tail(&w->list, &q->q);
1993	up(&q->s); /* within the spinlock,
1994		      see comment near end of drbd_worker() */
1995	spin_unlock_irqrestore(&q->q_lock, flags);
 
1996}
1997
1998static inline void wake_asender(struct drbd_conf *mdev)
1999{
2000	if (test_bit(SIGNAL_ASENDER, &mdev->flags))
2001		force_sig(DRBD_SIG, mdev->asender.task);
2002}
2003
2004static inline void request_ping(struct drbd_conf *mdev)
2005{
2006	set_bit(SEND_PING, &mdev->flags);
2007	wake_asender(mdev);
2008}
2009
2010static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
2011	enum drbd_packets cmd)
2012{
2013	struct p_header80 h;
2014	return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
 
 
 
 
 
2015}
2016
2017static inline int drbd_send_ping(struct drbd_conf *mdev)
2018{
2019	struct p_header80 h;
2020	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
2021}
2022
2023static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
2024{
2025	struct p_header80 h;
2026	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
2027}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2028
2029static inline void drbd_thread_stop(struct drbd_thread *thi)
2030{
2031	_drbd_thread_stop(thi, false, true);
2032}
2033
2034static inline void drbd_thread_stop_nowait(struct drbd_thread *thi)
2035{
2036	_drbd_thread_stop(thi, false, false);
2037}
2038
2039static inline void drbd_thread_restart_nowait(struct drbd_thread *thi)
2040{
2041	_drbd_thread_stop(thi, true, false);
2042}
2043
2044/* counts how many answer packets packets we expect from our peer,
2045 * for either explicit application requests,
2046 * or implicit barrier packets as necessary.
2047 * increased:
2048 *  w_send_barrier
2049 *  _req_mod(req, queue_for_net_write or queue_for_net_read);
2050 *    it is much easier and equally valid to count what we queue for the
2051 *    worker, even before it actually was queued or send.
2052 *    (drbd_make_request_common; recovery path on read io-error)
2053 * decreased:
2054 *  got_BarrierAck (respective tl_clear, tl_clear_barrier)
2055 *  _req_mod(req, data_received)
2056 *     [from receive_DataReply]
2057 *  _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked)
2058 *     [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)]
2059 *     for some reason it is NOT decreased in got_NegAck,
2060 *     but in the resulting cleanup code from report_params.
2061 *     we should try to remember the reason for that...
2062 *  _req_mod(req, send_failed or send_canceled)
2063 *  _req_mod(req, connection_lost_while_pending)
2064 *     [from tl_clear_barrier]
2065 */
2066static inline void inc_ap_pending(struct drbd_conf *mdev)
2067{
2068	atomic_inc(&mdev->ap_pending_cnt);
2069}
2070
2071#define ERR_IF_CNT_IS_NEGATIVE(which)				\
2072	if (atomic_read(&mdev->which) < 0)			\
2073		dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n",	\
2074		    __func__ , __LINE__ ,			\
2075		    atomic_read(&mdev->which))
2076
2077#define dec_ap_pending(mdev)	do {				\
2078	typecheck(struct drbd_conf *, mdev);			\
2079	if (atomic_dec_and_test(&mdev->ap_pending_cnt))		\
2080		wake_up(&mdev->misc_wait);			\
2081	ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0)
 
 
2082
2083/* counts how many resync-related answers we still expect from the peer
2084 *		     increase			decrease
2085 * C_SYNC_TARGET sends P_RS_DATA_REQUEST (and expects P_RS_DATA_REPLY)
2086 * C_SYNC_SOURCE sends P_RS_DATA_REPLY   (and expects P_WRITE_ACK with ID_SYNCER)
2087 *					   (or P_NEG_ACK with ID_SYNCER)
2088 */
2089static inline void inc_rs_pending(struct drbd_conf *mdev)
2090{
2091	atomic_inc(&mdev->rs_pending_cnt);
2092}
2093
2094#define dec_rs_pending(mdev)	do {				\
2095	typecheck(struct drbd_conf *, mdev);			\
2096	atomic_dec(&mdev->rs_pending_cnt);			\
2097	ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0)
 
 
2098
2099/* counts how many answers we still need to send to the peer.
2100 * increased on
2101 *  receive_Data	unless protocol A;
2102 *			we need to send a P_RECV_ACK (proto B)
2103 *			or P_WRITE_ACK (proto C)
2104 *  receive_RSDataReply (recv_resync_read) we need to send a P_WRITE_ACK
2105 *  receive_DataRequest (receive_RSDataRequest) we need to send back P_DATA
2106 *  receive_Barrier_*	we need to send a P_BARRIER_ACK
2107 */
2108static inline void inc_unacked(struct drbd_conf *mdev)
2109{
2110	atomic_inc(&mdev->unacked_cnt);
2111}
2112
2113#define dec_unacked(mdev)	do {				\
2114	typecheck(struct drbd_conf *, mdev);			\
2115	atomic_dec(&mdev->unacked_cnt);				\
2116	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
2117
2118#define sub_unacked(mdev, n)	do {				\
2119	typecheck(struct drbd_conf *, mdev);			\
2120	atomic_sub(n, &mdev->unacked_cnt);			\
2121	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
2122
 
 
 
 
 
 
2123
2124static inline void put_net_conf(struct drbd_conf *mdev)
2125{
2126	if (atomic_dec_and_test(&mdev->net_cnt))
2127		wake_up(&mdev->net_cnt_wait);
2128}
2129
2130/**
2131 * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there
2132 * @mdev:	DRBD device.
2133 *
2134 * You have to call put_net_conf() when finished working with mdev->net_conf.
2135 */
2136static inline int get_net_conf(struct drbd_conf *mdev)
2137{
2138	int have_net_conf;
 
 
2139
2140	atomic_inc(&mdev->net_cnt);
2141	have_net_conf = mdev->state.conn >= C_UNCONNECTED;
2142	if (!have_net_conf)
2143		put_net_conf(mdev);
2144	return have_net_conf;
2145}
2146
2147/**
2148 * get_ldev() - Increase the ref count on mdev->ldev. Returns 0 if there is no ldev
2149 * @M:		DRBD device.
 
2150 *
2151 * You have to call put_ldev() when finished working with mdev->ldev.
2152 */
2153#define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT))
2154#define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS))
2155
2156static inline void put_ldev(struct drbd_conf *mdev)
2157{
2158	int i = atomic_dec_return(&mdev->local_cnt);
 
 
 
 
 
 
 
2159
2160	/* This may be called from some endio handler,
2161	 * so we must not sleep here. */
2162
2163	__release(local);
2164	D_ASSERT(i >= 0);
2165	if (i == 0) {
2166		if (mdev->state.disk == D_DISKLESS)
2167			/* even internal references gone, safe to destroy */
2168			drbd_ldev_destroy(mdev);
2169		if (mdev->state.disk == D_FAILED)
2170			/* all application IO references gone. */
2171			drbd_go_diskless(mdev);
2172		wake_up(&mdev->misc_wait);
 
2173	}
2174}
2175
2176#ifndef __CHECKER__
2177static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
2178{
2179	int io_allowed;
2180
2181	/* never get a reference while D_DISKLESS */
2182	if (mdev->state.disk == D_DISKLESS)
2183		return 0;
2184
2185	atomic_inc(&mdev->local_cnt);
2186	io_allowed = (mdev->state.disk >= mins);
2187	if (!io_allowed)
2188		put_ldev(mdev);
2189	return io_allowed;
2190}
2191#else
2192extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins);
2193#endif
2194
2195/* you must have an "get_ldev" reference */
2196static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
2197		unsigned long *bits_left, unsigned int *per_mil_done)
2198{
2199	/* this is to break it at compile time when we change that, in case we
2200	 * want to support more than (1<<32) bits on a 32bit arch. */
2201	typecheck(unsigned long, mdev->rs_total);
2202
2203	/* note: both rs_total and rs_left are in bits, i.e. in
2204	 * units of BM_BLOCK_SIZE.
2205	 * for the percentage, we don't care. */
2206
2207	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2208		*bits_left = mdev->ov_left;
2209	else
2210		*bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
2211	/* >> 10 to prevent overflow,
2212	 * +1 to prevent division by zero */
2213	if (*bits_left > mdev->rs_total) {
2214		/* doh. maybe a logic bug somewhere.
2215		 * may also be just a race condition
2216		 * between this and a disconnect during sync.
2217		 * for now, just prevent in-kernel buffer overflow.
2218		 */
2219		smp_rmb();
2220		dev_warn(DEV, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n",
2221				drbd_conn_str(mdev->state.conn),
2222				*bits_left, mdev->rs_total, mdev->rs_failed);
2223		*per_mil_done = 0;
2224	} else {
2225		/* Make sure the division happens in long context.
2226		 * We allow up to one petabyte storage right now,
2227		 * at a granularity of 4k per bit that is 2**38 bits.
2228		 * After shift right and multiplication by 1000,
2229		 * this should still fit easily into a 32bit long,
2230		 * so we don't need a 64bit division on 32bit arch.
2231		 * Note: currently we don't support such large bitmaps on 32bit
2232		 * arch anyways, but no harm done to be prepared for it here.
2233		 */
2234		unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10;
2235		unsigned long left = *bits_left >> shift;
2236		unsigned long total = 1UL + (mdev->rs_total >> shift);
2237		unsigned long tmp = 1000UL - left * 1000UL/total;
2238		*per_mil_done = tmp;
2239	}
2240}
2241
2242
2243/* this throttles on-the-fly application requests
2244 * according to max_buffers settings;
2245 * maybe re-implement using semaphores? */
2246static inline int drbd_get_max_buffers(struct drbd_conf *mdev)
2247{
2248	int mxb = 1000000; /* arbitrary limit on open requests */
2249	if (get_net_conf(mdev)) {
2250		mxb = mdev->net_conf->max_buffers;
2251		put_net_conf(mdev);
2252	}
 
 
 
2253	return mxb;
2254}
2255
2256static inline int drbd_state_is_stable(struct drbd_conf *mdev)
2257{
2258	union drbd_state s = mdev->state;
2259
2260	/* DO NOT add a default clause, we want the compiler to warn us
2261	 * for any newly introduced state we may have forgotten to add here */
2262
2263	switch ((enum drbd_conns)s.conn) {
2264	/* new io only accepted when there is no connection, ... */
2265	case C_STANDALONE:
2266	case C_WF_CONNECTION:
2267	/* ... or there is a well established connection. */
2268	case C_CONNECTED:
2269	case C_SYNC_SOURCE:
2270	case C_SYNC_TARGET:
2271	case C_VERIFY_S:
2272	case C_VERIFY_T:
2273	case C_PAUSED_SYNC_S:
2274	case C_PAUSED_SYNC_T:
2275	case C_AHEAD:
2276	case C_BEHIND:
2277		/* transitional states, IO allowed */
2278	case C_DISCONNECTING:
2279	case C_UNCONNECTED:
2280	case C_TIMEOUT:
2281	case C_BROKEN_PIPE:
2282	case C_NETWORK_FAILURE:
2283	case C_PROTOCOL_ERROR:
2284	case C_TEAR_DOWN:
2285	case C_WF_REPORT_PARAMS:
2286	case C_STARTING_SYNC_S:
2287	case C_STARTING_SYNC_T:
2288		break;
2289
2290		/* Allow IO in BM exchange states with new protocols */
2291	case C_WF_BITMAP_S:
2292		if (mdev->agreed_pro_version < 96)
2293			return 0;
2294		break;
2295
2296		/* no new io accepted in these states */
2297	case C_WF_BITMAP_T:
2298	case C_WF_SYNC_UUID:
2299	case C_MASK:
2300		/* not "stable" */
2301		return 0;
2302	}
2303
2304	switch ((enum drbd_disk_state)s.disk) {
2305	case D_DISKLESS:
2306	case D_INCONSISTENT:
2307	case D_OUTDATED:
2308	case D_CONSISTENT:
2309	case D_UP_TO_DATE:
 
2310		/* disk state is stable as well. */
2311		break;
2312
2313	/* no new io accepted during tansitional states */
2314	case D_ATTACHING:
2315	case D_FAILED:
2316	case D_NEGOTIATING:
2317	case D_UNKNOWN:
2318	case D_MASK:
2319		/* not "stable" */
2320		return 0;
2321	}
2322
2323	return 1;
2324}
2325
2326static inline int is_susp(union drbd_state s)
2327{
2328	return s.susp || s.susp_nod || s.susp_fen;
 
 
2329}
2330
2331static inline bool may_inc_ap_bio(struct drbd_conf *mdev)
2332{
2333	int mxb = drbd_get_max_buffers(mdev);
2334
2335	if (is_susp(mdev->state))
2336		return false;
2337	if (test_bit(SUSPEND_IO, &mdev->flags))
2338		return false;
2339
2340	/* to avoid potential deadlock or bitmap corruption,
2341	 * in various places, we only allow new application io
2342	 * to start during "stable" states. */
2343
2344	/* no new io accepted when attaching or detaching the disk */
2345	if (!drbd_state_is_stable(mdev))
2346		return false;
2347
2348	/* since some older kernels don't have atomic_add_unless,
2349	 * and we are within the spinlock anyways, we have this workaround.  */
2350	if (atomic_read(&mdev->ap_bio_cnt) > mxb)
2351		return false;
2352	if (test_bit(BITMAP_IO, &mdev->flags))
2353		return false;
2354	return true;
2355}
2356
2357static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count)
2358{
2359	bool rv = false;
2360
2361	spin_lock_irq(&mdev->req_lock);
2362	rv = may_inc_ap_bio(mdev);
2363	if (rv)
2364		atomic_add(count, &mdev->ap_bio_cnt);
2365	spin_unlock_irq(&mdev->req_lock);
2366
2367	return rv;
2368}
2369
2370static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
2371{
2372	/* we wait here
2373	 *    as long as the device is suspended
2374	 *    until the bitmap is no longer on the fly during connection
2375	 *    handshake as long as we would exeed the max_buffer limit.
2376	 *
2377	 * to avoid races with the reconnect code,
2378	 * we need to atomic_inc within the spinlock. */
2379
2380	wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev, count));
2381}
2382
2383static inline void dec_ap_bio(struct drbd_conf *mdev)
2384{
2385	int mxb = drbd_get_max_buffers(mdev);
2386	int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt);
 
 
 
 
 
 
 
 
 
2387
2388	D_ASSERT(ap_bio >= 0);
2389	/* this currently does wake_up for every dec_ap_bio!
2390	 * maybe rather introduce some type of hysteresis?
2391	 * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */
2392	if (ap_bio < mxb)
2393		wake_up(&mdev->misc_wait);
2394	if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
2395		if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
2396			drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
2397	}
2398}
2399
2400static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
2401{
2402	int changed = mdev->ed_uuid != val;
2403	mdev->ed_uuid = val;
2404	return changed;
2405}
2406
2407static inline int seq_cmp(u32 a, u32 b)
2408{
2409	/* we assume wrap around at 32bit.
2410	 * for wrap around at 24bit (old atomic_t),
2411	 * we'd have to
2412	 *  a <<= 8; b <<= 8;
2413	 */
2414	return (s32)(a) - (s32)(b);
2415}
2416#define seq_lt(a, b) (seq_cmp((a), (b)) < 0)
2417#define seq_gt(a, b) (seq_cmp((a), (b)) > 0)
2418#define seq_ge(a, b) (seq_cmp((a), (b)) >= 0)
2419#define seq_le(a, b) (seq_cmp((a), (b)) <= 0)
2420/* CAUTION: please no side effects in arguments! */
2421#define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b)))
2422
2423static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq)
2424{
2425	unsigned int m;
2426	spin_lock(&mdev->peer_seq_lock);
2427	m = seq_max(mdev->peer_seq, new_seq);
2428	mdev->peer_seq = m;
2429	spin_unlock(&mdev->peer_seq_lock);
2430	if (m == new_seq)
2431		wake_up(&mdev->seq_wait);
2432}
2433
2434static inline void drbd_update_congested(struct drbd_conf *mdev)
2435{
2436	struct sock *sk = mdev->data.socket->sk;
2437	if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
2438		set_bit(NET_CONGESTED, &mdev->flags);
2439}
2440
2441static inline int drbd_queue_order_type(struct drbd_conf *mdev)
2442{
2443	/* sorry, we currently have no working implementation
2444	 * of distributed TCQ stuff */
2445#ifndef QUEUE_ORDERED_NONE
2446#define QUEUE_ORDERED_NONE 0
2447#endif
2448	return QUEUE_ORDERED_NONE;
2449}
2450
2451static inline void drbd_md_flush(struct drbd_conf *mdev)
2452{
2453	int r;
2454
2455	if (test_bit(MD_NO_FUA, &mdev->flags))
2456		return;
2457
2458	r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL);
2459	if (r) {
2460		set_bit(MD_NO_FUA, &mdev->flags);
2461		dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
2462	}
2463}
2464
2465#endif

   1/*
   2  drbd_int.h
   3
   4  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
   5
   6  Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
   7  Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   8  Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
   9
  10  drbd is free software; you can redistribute it and/or modify
  11  it under the terms of the GNU General Public License as published by
  12  the Free Software Foundation; either version 2, or (at your option)
  13  any later version.
  14
  15  drbd is distributed in the hope that it will be useful,
  16  but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
  18  GNU General Public License for more details.
  19
  20  You should have received a copy of the GNU General Public License
  21  along with drbd; see the file COPYING.  If not, write to
  22  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  23
  24*/
  25
  26#ifndef _DRBD_INT_H
  27#define _DRBD_INT_H
  28
  29#include <crypto/hash.h>
  30#include <linux/compiler.h>
  31#include <linux/types.h>
 
  32#include <linux/list.h>
  33#include <linux/sched/signal.h>
  34#include <linux/bitops.h>
  35#include <linux/slab.h>
 
  36#include <linux/ratelimit.h>
  37#include <linux/tcp.h>
  38#include <linux/mutex.h>
  39#include <linux/major.h>
  40#include <linux/blkdev.h>
  41#include <linux/backing-dev.h>
  42#include <linux/genhd.h>
  43#include <linux/idr.h>
  44#include <linux/dynamic_debug.h>
  45#include <net/tcp.h>
  46#include <linux/lru_cache.h>
  47#include <linux/prefetch.h>
  48#include <linux/drbd_genl_api.h>
  49#include <linux/drbd.h>
  50#include "drbd_strings.h"
  51#include "drbd_state.h"
  52#include "drbd_protocol.h"
  53
  54#ifdef __CHECKER__
  55# define __protected_by(x)       __attribute__((require_context(x,1,999,"rdwr")))
  56# define __protected_read_by(x)  __attribute__((require_context(x,1,999,"read")))
  57# define __protected_write_by(x) __attribute__((require_context(x,1,999,"write")))
  58# define __must_hold(x)       __attribute__((context(x,1,1), require_context(x,1,999,"call")))
  59#else
  60# define __protected_by(x)
  61# define __protected_read_by(x)
  62# define __protected_write_by(x)
  63# define __must_hold(x)
  64#endif
  65
  66/* shared module parameters, defined in drbd_main.c */
 
 
 
 
 
 
 
  67#ifdef CONFIG_DRBD_FAULT_INJECTION
  68extern int drbd_enable_faults;
  69extern int drbd_fault_rate;
 
  70#endif
  71
  72extern unsigned int drbd_minor_count;
  73extern char drbd_usermode_helper[];
  74extern int drbd_proc_details;
  75
 
 
 
 
 
 
  76
  77/* This is used to stop/restart our threads.
  78 * Cannot use SIGTERM nor SIGKILL, since these
  79 * are sent out by init on runlevel changes
  80 * I choose SIGHUP for now.
  81 */
  82#define DRBD_SIGKILL SIGHUP
  83
 
 
 
 
 
  84#define ID_IN_SYNC      (4711ULL)
  85#define ID_OUT_OF_SYNC  (4712ULL)
 
  86#define ID_SYNCER (-1ULL)
 
 
 
 
 
  87
  88#define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL)
  89
  90struct drbd_device;
  91struct drbd_connection;
  92
  93#define __drbd_printk_device(level, device, fmt, args...) \
  94	dev_printk(level, disk_to_dev((device)->vdisk), fmt, ## args)
  95#define __drbd_printk_peer_device(level, peer_device, fmt, args...) \
  96	dev_printk(level, disk_to_dev((peer_device)->device->vdisk), fmt, ## args)
  97#define __drbd_printk_resource(level, resource, fmt, args...) \
  98	printk(level "drbd %s: " fmt, (resource)->name, ## args)
  99#define __drbd_printk_connection(level, connection, fmt, args...) \
 100	printk(level "drbd %s: " fmt, (connection)->resource->name, ## args)
 101
 102void drbd_printk_with_wrong_object_type(void);
 103
 104#define __drbd_printk_if_same_type(obj, type, func, level, fmt, args...) \
 105	(__builtin_types_compatible_p(typeof(obj), type) || \
 106	 __builtin_types_compatible_p(typeof(obj), const type)), \
 107	func(level, (const type)(obj), fmt, ## args)
 108
 109#define drbd_printk(level, obj, fmt, args...) \
 110	__builtin_choose_expr( \
 111	  __drbd_printk_if_same_type(obj, struct drbd_device *, \
 112			     __drbd_printk_device, level, fmt, ## args), \
 113	  __builtin_choose_expr( \
 114	    __drbd_printk_if_same_type(obj, struct drbd_resource *, \
 115			       __drbd_printk_resource, level, fmt, ## args), \
 116	    __builtin_choose_expr( \
 117	      __drbd_printk_if_same_type(obj, struct drbd_connection *, \
 118				 __drbd_printk_connection, level, fmt, ## args), \
 119	      __builtin_choose_expr( \
 120		__drbd_printk_if_same_type(obj, struct drbd_peer_device *, \
 121				 __drbd_printk_peer_device, level, fmt, ## args), \
 122		drbd_printk_with_wrong_object_type()))))
 123
 124#define drbd_dbg(obj, fmt, args...) \
 125	drbd_printk(KERN_DEBUG, obj, fmt, ## args)
 126#define drbd_alert(obj, fmt, args...) \
 127	drbd_printk(KERN_ALERT, obj, fmt, ## args)
 128#define drbd_err(obj, fmt, args...) \
 129	drbd_printk(KERN_ERR, obj, fmt, ## args)
 130#define drbd_warn(obj, fmt, args...) \
 131	drbd_printk(KERN_WARNING, obj, fmt, ## args)
 132#define drbd_info(obj, fmt, args...) \
 133	drbd_printk(KERN_INFO, obj, fmt, ## args)
 134#define drbd_emerg(obj, fmt, args...) \
 135	drbd_printk(KERN_EMERG, obj, fmt, ## args)
 136
 137#define dynamic_drbd_dbg(device, fmt, args...) \
 138	dynamic_dev_dbg(disk_to_dev(device->vdisk), fmt, ## args)
 139
 140#define D_ASSERT(device, exp)	do { \
 141	if (!(exp)) \
 142		drbd_err(device, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__); \
 143	} while (0)
 144
 145/**
 146 * expect  -  Make an assertion
 147 *
 148 * Unlike the assert macro, this macro returns a boolean result.
 149 */
 150#define expect(exp) ({								\
 151		bool _bool = (exp);						\
 152		if (!_bool)							\
 153			drbd_err(device, "ASSERTION %s FAILED in %s\n",		\
 154			        #exp, __func__);				\
 155		_bool;								\
 156		})
 157
 158/* Defines to control fault insertion */
 159enum {
 160	DRBD_FAULT_MD_WR = 0,	/* meta data write */
 161	DRBD_FAULT_MD_RD = 1,	/*           read  */
 162	DRBD_FAULT_RS_WR = 2,	/* resync          */
 163	DRBD_FAULT_RS_RD = 3,
 164	DRBD_FAULT_DT_WR = 4,	/* data            */
 165	DRBD_FAULT_DT_RD = 5,
 166	DRBD_FAULT_DT_RA = 6,	/* data read ahead */
 167	DRBD_FAULT_BM_ALLOC = 7,	/* bitmap allocation */
 168	DRBD_FAULT_AL_EE = 8,	/* alloc ee */
 169	DRBD_FAULT_RECEIVE = 9, /* Changes some bytes upon receiving a [rs]data block */
 170
 171	DRBD_FAULT_MAX,
 172};
 173
 174extern unsigned int
 175_drbd_insert_fault(struct drbd_device *device, unsigned int type);
 176
 177static inline int
 178drbd_insert_fault(struct drbd_device *device, unsigned int type) {
 179#ifdef CONFIG_DRBD_FAULT_INJECTION
 180	return drbd_fault_rate &&
 181		(drbd_enable_faults & (1<<type)) &&
 182		_drbd_insert_fault(device, type);
 183#else
 184	return 0;
 185#endif
 186}
 187
 188/* integer division, round _UP_ to the next integer */
 189#define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0))
 190/* usual integer division */
 191#define div_floor(A, B) ((A)/(B))
 192
 
 
 
 
 
 193extern struct ratelimit_state drbd_ratelimit_state;
 194extern struct idr drbd_devices; /* RCU, updates: genl_lock() */
 195extern struct list_head drbd_resources; /* RCU, updates: genl_lock() */
 196
 197extern const char *cmdname(enum drbd_packet cmd);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 198
 199/* for sending/receiving the bitmap,
 200 * possibly in some encoding scheme */
 201struct bm_xfer_ctx {
 202	/* "const"
 203	 * stores total bits and long words
 204	 * of the bitmap, so we don't need to
 205	 * call the accessor functions over and again. */
 206	unsigned long bm_bits;
 207	unsigned long bm_words;
 208	/* during xfer, current position within the bitmap */
 209	unsigned long bit_offset;
 210	unsigned long word_offset;
 211
 212	/* statistics; index: (h->command == P_BITMAP) */
 213	unsigned packets[2];
 214	unsigned bytes[2];
 215};
 216
 217extern void INFO_bm_xfer_stats(struct drbd_device *device,
 218		const char *direction, struct bm_xfer_ctx *c);
 219
 220static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
 221{
 222	/* word_offset counts "native long words" (32 or 64 bit),
 223	 * aligned at 64 bit.
 224	 * Encoded packet may end at an unaligned bit offset.
 225	 * In case a fallback clear text packet is transmitted in
 226	 * between, we adjust this offset back to the last 64bit
 227	 * aligned "native long word", which makes coding and decoding
 228	 * the plain text bitmap much more convenient.  */
 229#if BITS_PER_LONG == 64
 230	c->word_offset = c->bit_offset >> 6;
 231#elif BITS_PER_LONG == 32
 232	c->word_offset = c->bit_offset >> 5;
 233	c->word_offset &= ~(1UL);
 234#else
 235# error "unsupported BITS_PER_LONG"
 236#endif
 237}
 238
 239extern unsigned int drbd_header_size(struct drbd_connection *connection);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 240
 241/**********************************************************************/
 242enum drbd_thread_state {
 243	NONE,
 244	RUNNING,
 245	EXITING,
 246	RESTARTING
 247};
 248
 249struct drbd_thread {
 250	spinlock_t t_lock;
 251	struct task_struct *task;
 252	struct completion stop;
 253	enum drbd_thread_state t_state;
 254	int (*function) (struct drbd_thread *);
 255	struct drbd_resource *resource;
 256	struct drbd_connection *connection;
 257	int reset_cpu_mask;
 258	const char *name;
 259};
 260
 261static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
 262{
 263	/* THINK testing the t_state seems to be uncritical in all cases
 264	 * (but thread_{start,stop}), so we can read it *without* the lock.
 265	 *	--lge */
 266
 267	smp_rmb();
 268	return thi->t_state;
 269}
 270
 
 
 271struct drbd_work {
 272	struct list_head list;
 273	int (*cb)(struct drbd_work *, int cancel);
 274};
 275
 276struct drbd_device_work {
 277	struct drbd_work w;
 278	struct drbd_device *device;
 279};
 280
 281#include "drbd_interval.h"
 282
 283extern int drbd_wait_misc(struct drbd_device *, struct drbd_interval *);
 284
 285extern void lock_all_resources(void);
 286extern void unlock_all_resources(void);
 287
 288struct drbd_request {
 289	struct drbd_work w;
 290	struct drbd_device *device;
 291
 292	/* if local IO is not allowed, will be NULL.
 293	 * if local IO _is_ allowed, holds the locally submitted bio clone,
 294	 * or, after local IO completion, the ERR_PTR(error).
 295	 * see drbd_request_endio(). */
 296	struct bio *private_bio;
 297
 298	struct drbd_interval i;
 
 
 
 299
 300	/* epoch: used to check on "completion" whether this req was in
 301	 * the current epoch, and we therefore have to close it,
 302	 * causing a p_barrier packet to be send, starting a new epoch.
 303	 *
 304	 * This corresponds to "barrier" in struct p_barrier[_ack],
 305	 * and to "barrier_nr" in struct drbd_epoch (and various
 306	 * comments/function parameters/local variable names).
 307	 */
 308	unsigned int epoch;
 309
 310	struct list_head tl_requests; /* ring list in the transfer log */
 311	struct bio *master_bio;       /* master bio pointer */
 
 
 
 
 312
 313	/* see struct drbd_device */
 314	struct list_head req_pending_master_completion;
 315	struct list_head req_pending_local;
 316
 317	/* for generic IO accounting */
 318	unsigned long start_jif;
 319
 320	/* for DRBD internal statistics */
 321
 322	/* Minimal set of time stamps to determine if we wait for activity log
 323	 * transactions, local disk or peer.  32 bit "jiffies" are good enough,
 324	 * we don't expect a DRBD request to be stalled for several month.
 325	 */
 326
 327	/* before actual request processing */
 328	unsigned long in_actlog_jif;
 329
 330	/* local disk */
 331	unsigned long pre_submit_jif;
 332
 333	/* per connection */
 334	unsigned long pre_send_jif;
 335	unsigned long acked_jif;
 336	unsigned long net_done_jif;
 337
 338	/* Possibly even more detail to track each phase:
 339	 *  master_completion_jif
 340	 *      how long did it take to complete the master bio
 341	 *      (application visible latency)
 342	 *  allocated_jif
 343	 *      how long the master bio was blocked until we finally allocated
 344	 *      a tracking struct
 345	 *  in_actlog_jif
 346	 *      how long did we wait for activity log transactions
 347	 *
 348	 *  net_queued_jif
 349	 *      when did we finally queue it for sending
 350	 *  pre_send_jif
 351	 *      when did we start sending it
 352	 *  post_send_jif
 353	 *      how long did we block in the network stack trying to send it
 354	 *  acked_jif
 355	 *      when did we receive (or fake, in protocol A) a remote ACK
 356	 *  net_done_jif
 357	 *      when did we receive final acknowledgement (P_BARRIER_ACK),
 358	 *      or decide, e.g. on connection loss, that we do no longer expect
 359	 *      anything from this peer for this request.
 360	 *
 361	 *  pre_submit_jif
 362	 *  post_sub_jif
 363	 *      when did we start submiting to the lower level device,
 364	 *      and how long did we block in that submit function
 365	 *  local_completion_jif
 366	 *      how long did it take the lower level device to complete this request
 367	 */
 368
 369
 370	/* once it hits 0, we may complete the master_bio */
 371	atomic_t completion_ref;
 372	/* once it hits 0, we may destroy this drbd_request object */
 373	struct kref kref;
 374
 375	unsigned rq_state; /* see comments above _req_mod() */
 376};
 377
 378struct drbd_epoch {
 379	struct drbd_connection *connection;
 380	struct list_head list;
 381	unsigned int barrier_nr;
 382	atomic_t epoch_size; /* increased on every request added. */
 383	atomic_t active;     /* increased on every req. added, and dec on every finished. */
 384	unsigned long flags;
 385};
 386
 387/* Prototype declaration of function defined in drbd_receiver.c */
 388int drbdd_init(struct drbd_thread *);
 389int drbd_asender(struct drbd_thread *);
 390
 391/* drbd_epoch flag bits */
 392enum {
 393	DE_HAVE_BARRIER_NUMBER,
 394};
 395
 396enum epoch_event {
 397	EV_PUT,
 398	EV_GOT_BARRIER_NR,
 399	EV_BECAME_LAST,
 400	EV_CLEANUP = 32, /* used as flag */
 401};
 402
 
 
 
 
 
 403struct digest_info {
 404	int digest_size;
 405	void *digest;
 406};
 407
 408struct drbd_peer_request {
 409	struct drbd_work w;
 410	struct drbd_peer_device *peer_device;
 411	struct drbd_epoch *epoch; /* for writes */
 
 412	struct page *pages;
 413	atomic_t pending_bios;
 414	struct drbd_interval i;
 415	/* see comments on ee flag bits below */
 416	unsigned long flags;
 417	unsigned long submit_jif;
 418	union {
 419		u64 block_id;
 420		struct digest_info *digest;
 421	};
 422};
 423
 424/* ee flag bits.
 425 * While corresponding bios are in flight, the only modification will be
 426 * set_bit WAS_ERROR, which has to be atomic.
 427 * If no bios are in flight yet, or all have been completed,
 428 * non-atomic modification to ee->flags is ok.
 429 */
 430enum {
 431	__EE_CALL_AL_COMPLETE_IO,
 432	__EE_MAY_SET_IN_SYNC,
 433
 434	/* is this a TRIM aka REQ_DISCARD? */
 435	__EE_IS_TRIM,
 436
 437	/* In case a barrier failed,
 438	 * we need to resubmit without the barrier flag. */
 439	__EE_RESUBMITTED,
 440
 441	/* we may have several bios per peer request.
 442	 * if any of those fail, we set this flag atomically
 443	 * from the endio callback */
 444	__EE_WAS_ERROR,
 445
 446	/* This ee has a pointer to a digest instead of a block id */
 447	__EE_HAS_DIGEST,
 448
 449	/* Conflicting local requests need to be restarted after this request */
 450	__EE_RESTART_REQUESTS,
 451
 452	/* The peer wants a write ACK for this (wire proto C) */
 453	__EE_SEND_WRITE_ACK,
 454
 455	/* Is set when net_conf had two_primaries set while creating this peer_req */
 456	__EE_IN_INTERVAL_TREE,
 457
 458	/* for debugfs: */
 459	/* has this been submitted, or does it still wait for something else? */
 460	__EE_SUBMITTED,
 461
 462	/* this is/was a write request */
 463	__EE_WRITE,
 464
 465	/* this is/was a write same request */
 466	__EE_WRITE_SAME,
 467
 468	/* this originates from application on peer
 469	 * (not some resync or verify or other DRBD internal request) */
 470	__EE_APPLICATION,
 471
 472	/* If it contains only 0 bytes, send back P_RS_DEALLOCATED */
 473	__EE_RS_THIN_REQ,
 474};
 475#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 476#define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
 477#define EE_IS_TRIM             (1<<__EE_IS_TRIM)
 478#define EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
 479#define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
 480#define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
 481#define EE_RESTART_REQUESTS	(1<<__EE_RESTART_REQUESTS)
 482#define EE_SEND_WRITE_ACK	(1<<__EE_SEND_WRITE_ACK)
 483#define EE_IN_INTERVAL_TREE	(1<<__EE_IN_INTERVAL_TREE)
 484#define EE_SUBMITTED		(1<<__EE_SUBMITTED)
 485#define EE_WRITE		(1<<__EE_WRITE)
 486#define EE_WRITE_SAME		(1<<__EE_WRITE_SAME)
 487#define EE_APPLICATION		(1<<__EE_APPLICATION)
 488#define EE_RS_THIN_REQ		(1<<__EE_RS_THIN_REQ)
 489
 490/* flag bits per device */
 491enum {
 
 
 
 
 
 492	UNPLUG_REMOTE,		/* sending a "UnplugRemote" could help */
 493	MD_DIRTY,		/* current uuids and flags not yet on disk */
 
 494	USE_DEGR_WFC_T,		/* degr-wfc-timeout instead of wfc-timeout. */
 
 495	CL_ST_CHG_SUCCESS,
 496	CL_ST_CHG_FAIL,
 497	CRASHED_PRIMARY,	/* This node was a crashed primary.
 498				 * Gets cleared when the state.conn
 499				 * goes into C_CONNECTED state. */
 
 500	CONSIDER_RESYNC,
 501
 502	MD_NO_FUA,		/* Users wants us to not use FUA/FLUSH on meta data dev */
 503
 504	BITMAP_IO,		/* suspend application io;
 505				   once no more io in flight, start bitmap io */
 506	BITMAP_IO_QUEUED,       /* Started bitmap IO */
 507	WAS_IO_ERROR,		/* Local disk failed, returned IO error */
 508	WAS_READ_ERROR,		/* Local disk READ failed (set additionally to the above) */
 509	FORCE_DETACH,		/* Force-detach from local disk, aborting any pending local IO */
 510	RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
 
 
 
 
 
 
 
 
 511	RESIZE_PENDING,		/* Size change detected locally, waiting for the response from
 512				 * the peer, if it changed there as well. */
 
 
 513	NEW_CUR_UUID,		/* Create new current UUID when thawing IO */
 514	AL_SUSPENDED,		/* Activity logging is currently suspended. */
 515	AHEAD_TO_SYNC_SOURCE,   /* Ahead -> SyncSource queued */
 516	B_RS_H_DONE,		/* Before resync handler done (already executed) */
 517	DISCARD_MY_DATA,	/* discard_my_data flag per volume */
 518	READ_BALANCE_RR,
 519
 520	FLUSH_PENDING,		/* if set, device->flush_jif is when we submitted that flush
 521				 * from drbd_flush_after_epoch() */
 522
 523	/* cleared only after backing device related structures have been destroyed. */
 524	GOING_DISKLESS,		/* Disk is being detached, because of io-error, or admin request. */
 525
 526	/* to be used in drbd_device_post_work() */
 527	GO_DISKLESS,		/* tell worker to schedule cleanup before detach */
 528	DESTROY_DISK,		/* tell worker to close backing devices and destroy related structures. */
 529	MD_SYNC,		/* tell worker to call drbd_md_sync() */
 530	RS_START,		/* tell worker to start resync/OV */
 531	RS_PROGRESS,		/* tell worker that resync made significant progress */
 532	RS_DONE,		/* tell worker that resync is done */
 533};
 534
 535struct drbd_bitmap; /* opaque for drbd_device */
 536
 537/* definition of bits in bm_flags to be used in drbd_bm_lock
 538 * and drbd_bitmap_io and friends. */
 539enum bm_flag {
 
 
 
 540	/* currently locked for bulk operation */
 541	BM_LOCKED_MASK = 0xf,
 542
 543	/* in detail, that is: */
 544	BM_DONT_CLEAR = 0x1,
 545	BM_DONT_SET   = 0x2,
 546	BM_DONT_TEST  = 0x4,
 547
 548	/* so we can mark it locked for bulk operation,
 549	 * and still allow all non-bulk operations */
 550	BM_IS_LOCKED  = 0x8,
 551
 552	/* (test bit, count bit) allowed (common case) */
 553	BM_LOCKED_TEST_ALLOWED = BM_DONT_CLEAR | BM_DONT_SET | BM_IS_LOCKED,
 554
 555	/* testing bits, as well as setting new bits allowed, but clearing bits
 556	 * would be unexpected.  Used during bitmap receive.  Setting new bits
 557	 * requires sending of "out-of-sync" information, though. */
 558	BM_LOCKED_SET_ALLOWED = BM_DONT_CLEAR | BM_IS_LOCKED,
 559
 560	/* for drbd_bm_write_copy_pages, everything is allowed,
 561	 * only concurrent bulk operations are locked out. */
 562	BM_LOCKED_CHANGE_ALLOWED = BM_IS_LOCKED,
 563};
 564
 
 
 
 
 
 
 
 565struct drbd_work_queue {
 566	struct list_head q;
 
 567	spinlock_t q_lock;  /* to protect the list. */
 568	wait_queue_head_t q_wait;
 569};
 570
 571struct drbd_socket {
 
 572	struct mutex mutex;
 573	struct socket    *socket;
 574	/* this way we get our
 575	 * send/receive buffers off the stack */
 576	void *sbuf;
 577	void *rbuf;
 578};
 579
 580struct drbd_md {
 581	u64 md_offset;		/* sector offset to 'super' block */
 582
 583	u64 la_size_sect;	/* last agreed size, unit sectors */
 584	spinlock_t uuid_lock;
 585	u64 uuid[UI_SIZE];
 586	u64 device_uuid;
 587	u32 flags;
 588	u32 md_size_sect;
 589
 590	s32 al_offset;	/* signed relative sector offset to activity log */
 591	s32 bm_offset;	/* signed relative sector offset to bitmap */
 592
 593	/* cached value of bdev->disk_conf->meta_dev_idx (see below) */
 594	s32 meta_dev_idx;
 
 
 
 595
 596	/* see al_tr_number_to_on_disk_sector() */
 597	u32 al_stripes;
 598	u32 al_stripe_size_4k;
 599	u32 al_size_4k; /* cached product of the above */
 600};
 
 
 601
 602struct drbd_backing_dev {
 603	struct block_device *backing_bdev;
 604	struct block_device *md_bdev;
 605	struct drbd_md md;
 606	struct disk_conf *disk_conf; /* RCU, for updates: resource->conf_update */
 607	sector_t known_size; /* last known size of that backing device */
 608};
 609
 610struct drbd_md_io {
 611	struct page *page;
 612	unsigned long start_jif;	/* last call to drbd_md_get_buffer */
 613	unsigned long submit_jif;	/* last _drbd_md_sync_page_io() submit */
 614	const char *current_use;
 615	atomic_t in_use;
 616	unsigned int done;
 617	int error;
 618};
 619
 620struct bm_io_work {
 621	struct drbd_work w;
 622	char *why;
 623	enum bm_flag flags;
 624	int (*io_fn)(struct drbd_device *device);
 625	void (*done)(struct drbd_device *device, int rv);
 
 
 
 
 
 
 626};
 627
 628struct fifo_buffer {
 
 629	unsigned int head_index;
 630	unsigned int size;
 631	int total; /* sum of all values */
 632	int values[0];
 633};
 634extern struct fifo_buffer *fifo_alloc(int fifo_size);
 635
 636/* flag bits per connection */
 637enum {
 638	NET_CONGESTED,		/* The data socket is congested */
 639	RESOLVE_CONFLICTS,	/* Set on one node, cleared on the peer! */
 640	SEND_PING,
 641	GOT_PING_ACK,		/* set when we receive a ping_ack packet, ping_wait gets woken */
 642	CONN_WD_ST_CHG_REQ,	/* A cluster wide state change on the connection is active */
 643	CONN_WD_ST_CHG_OKAY,
 644	CONN_WD_ST_CHG_FAIL,
 645	CONN_DRY_RUN,		/* Expect disconnect after resync handshake. */
 646	CREATE_BARRIER,		/* next P_DATA is preceded by a P_BARRIER */
 647	STATE_SENT,		/* Do not change state/UUIDs while this is set */
 648	CALLBACK_PENDING,	/* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
 649				 * pending, from drbd worker context.
 650				 * If set, bdi_write_congested() returns true,
 651				 * so shrink_page_list() would not recurse into,
 652				 * and potentially deadlock on, this drbd worker.
 653				 */
 654	DISCONNECT_SENT,
 655
 656	DEVICE_WORK_PENDING,	/* tell worker that some device has pending work */
 657};
 658
 659enum which_state { NOW, OLD = NOW, NEW };
 660
 661struct drbd_resource {
 662	char *name;
 663#ifdef CONFIG_DEBUG_FS
 664	struct dentry *debugfs_res;
 665	struct dentry *debugfs_res_volumes;
 666	struct dentry *debugfs_res_connections;
 667	struct dentry *debugfs_res_in_flight_summary;
 668#endif
 669	struct kref kref;
 670	struct idr devices;		/* volume number to device mapping */
 671	struct list_head connections;
 672	struct list_head resources;
 673	struct res_opts res_opts;
 674	struct mutex conf_update;	/* mutex for ready-copy-update of net_conf and disk_conf */
 675	struct mutex adm_mutex;		/* mutex to serialize administrative requests */
 676	spinlock_t req_lock;
 677
 678	unsigned susp:1;		/* IO suspended by user */
 679	unsigned susp_nod:1;		/* IO suspended because no data */
 680	unsigned susp_fen:1;		/* IO suspended because fence peer handler runs */
 681
 682	enum write_ordering_e write_ordering;
 683
 684	cpumask_var_t cpu_mask;
 685};
 686
 687struct drbd_thread_timing_details
 688{
 689	unsigned long start_jif;
 690	void *cb_addr;
 691	const char *caller_fn;
 692	unsigned int line;
 693	unsigned int cb_nr;
 694};
 695
 696struct drbd_connection {
 697	struct list_head connections;
 698	struct drbd_resource *resource;
 699#ifdef CONFIG_DEBUG_FS
 700	struct dentry *debugfs_conn;
 701	struct dentry *debugfs_conn_callback_history;
 702	struct dentry *debugfs_conn_oldest_requests;
 703#endif
 704	struct kref kref;
 705	struct idr peer_devices;	/* volume number to peer device mapping */
 706	enum drbd_conns cstate;		/* Only C_STANDALONE to C_WF_REPORT_PARAMS */
 707	struct mutex cstate_mutex;	/* Protects graceful disconnects */
 708	unsigned int connect_cnt;	/* Inc each time a connection is established */
 709
 710	unsigned long flags;
 711	struct net_conf *net_conf;	/* content protected by rcu */
 712	wait_queue_head_t ping_wait;	/* Woken upon reception of a ping, and a state change */
 713
 714	struct sockaddr_storage my_addr;
 715	int my_addr_len;
 716	struct sockaddr_storage peer_addr;
 717	int peer_addr_len;
 718
 719	struct drbd_socket data;	/* data/barrier/cstate/parameter packets */
 720	struct drbd_socket meta;	/* ping/ack (metadata) packets */
 721	int agreed_pro_version;		/* actually used protocol version */
 722	u32 agreed_features;
 723	unsigned long last_received;	/* in jiffies, either socket */
 724	unsigned int ko_count;
 725
 726	struct list_head transfer_log;	/* all requests not yet fully processed */
 727
 728	struct crypto_shash *cram_hmac_tfm;
 729	struct crypto_ahash *integrity_tfm;  /* checksums we compute, updates protected by connection->data->mutex */
 730	struct crypto_ahash *peer_integrity_tfm;  /* checksums we verify, only accessed from receiver thread  */
 731	struct crypto_ahash *csums_tfm;
 732	struct crypto_ahash *verify_tfm;
 733	void *int_dig_in;
 734	void *int_dig_vv;
 735
 736	/* receiver side */
 737	struct drbd_epoch *current_epoch;
 738	spinlock_t epoch_lock;
 739	unsigned int epochs;
 740	atomic_t current_tle_nr;	/* transfer log epoch number */
 741	unsigned current_tle_writes;	/* writes seen within this tl epoch */
 742
 743	unsigned long last_reconnect_jif;
 744	/* empty member on older kernels without blk_start_plug() */
 745	struct blk_plug receiver_plug;
 746	struct drbd_thread receiver;
 747	struct drbd_thread worker;
 748	struct drbd_thread ack_receiver;
 749	struct workqueue_struct *ack_sender;
 750
 751	/* cached pointers,
 752	 * so we can look up the oldest pending requests more quickly.
 753	 * protected by resource->req_lock */
 754	struct drbd_request *req_next; /* DRBD 9: todo.req_next */
 755	struct drbd_request *req_ack_pending;
 756	struct drbd_request *req_not_net_done;
 757
 758	/* sender side */
 759	struct drbd_work_queue sender_work;
 760
 761#define DRBD_THREAD_DETAILS_HIST	16
 762	unsigned int w_cb_nr; /* keeps counting up */
 763	unsigned int r_cb_nr; /* keeps counting up */
 764	struct drbd_thread_timing_details w_timing_details[DRBD_THREAD_DETAILS_HIST];
 765	struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST];
 766
 767	struct {
 768		unsigned long last_sent_barrier_jif;
 769
 770		/* whether this sender thread
 771		 * has processed a single write yet. */
 772		bool seen_any_write_yet;
 773
 774		/* Which barrier number to send with the next P_BARRIER */
 775		int current_epoch_nr;
 776
 777		/* how many write requests have been sent
 778		 * with req->epoch == current_epoch_nr.
 779		 * If none, no P_BARRIER will be sent. */
 780		unsigned current_epoch_writes;
 781	} send;
 782};
 783
 784static inline bool has_net_conf(struct drbd_connection *connection)
 785{
 786	bool has_net_conf;
 787
 788	rcu_read_lock();
 789	has_net_conf = rcu_dereference(connection->net_conf);
 790	rcu_read_unlock();
 791
 792	return has_net_conf;
 793}
 794
 795void __update_timing_details(
 796		struct drbd_thread_timing_details *tdp,
 797		unsigned int *cb_nr,
 798		void *cb,
 799		const char *fn, const unsigned int line);
 800
 801#define update_worker_timing_details(c, cb) \
 802	__update_timing_details(c->w_timing_details, &c->w_cb_nr, cb, __func__ , __LINE__ )
 803#define update_receiver_timing_details(c, cb) \
 804	__update_timing_details(c->r_timing_details, &c->r_cb_nr, cb, __func__ , __LINE__ )
 805
 806struct submit_worker {
 807	struct workqueue_struct *wq;
 808	struct work_struct worker;
 809
 810	/* protected by ..->resource->req_lock */
 811	struct list_head writes;
 812};
 813
 814struct drbd_peer_device {
 815	struct list_head peer_devices;
 816	struct drbd_device *device;
 817	struct drbd_connection *connection;
 818	struct work_struct send_acks_work;
 819#ifdef CONFIG_DEBUG_FS
 820	struct dentry *debugfs_peer_dev;
 821#endif
 822};
 823
 824struct drbd_device {
 825	struct drbd_resource *resource;
 826	struct list_head peer_devices;
 827	struct list_head pending_bitmap_io;
 828
 829	unsigned long flush_jif;
 830#ifdef CONFIG_DEBUG_FS
 831	struct dentry *debugfs_minor;
 832	struct dentry *debugfs_vol;
 833	struct dentry *debugfs_vol_oldest_requests;
 834	struct dentry *debugfs_vol_act_log_extents;
 835	struct dentry *debugfs_vol_resync_extents;
 836	struct dentry *debugfs_vol_data_gen_id;
 837	struct dentry *debugfs_vol_ed_gen_id;
 838#endif
 839
 840	unsigned int vnr;	/* volume number within the connection */
 841	unsigned int minor;	/* device minor number */
 842
 843	struct kref kref;
 844
 
 845	/* things that are stored as / read from meta data on disk */
 846	unsigned long flags;
 847
 848	/* configured by drbdsetup */
 
 
 849	struct drbd_backing_dev *ldev __protected_by(local);
 850
 851	sector_t p_size;     /* partner's disk size */
 852	struct request_queue *rq_queue;
 853	struct block_device *this_bdev;
 854	struct gendisk	    *vdisk;
 855
 856	unsigned long last_reattach_jif;
 857	struct drbd_work resync_work;
 858	struct drbd_work unplug_work;
 
 
 
 
 
 
 
 859	struct timer_list resync_timer;
 860	struct timer_list md_sync_timer;
 861	struct timer_list start_resync_timer;
 862	struct timer_list request_timer;
 
 
 
 
 
 
 863
 864	/* Used after attach while negotiating new disk state. */
 865	union drbd_state new_state_tmp;
 866
 867	union drbd_dev_state state;
 868	wait_queue_head_t misc_wait;
 869	wait_queue_head_t state_wait;  /* upon each state change. */
 
 870	unsigned int send_cnt;
 871	unsigned int recv_cnt;
 872	unsigned int read_cnt;
 873	unsigned int writ_cnt;
 874	unsigned int al_writ_cnt;
 875	unsigned int bm_writ_cnt;
 876	atomic_t ap_bio_cnt;	 /* Requests we need to complete */
 877	atomic_t ap_actlog_cnt;  /* Requests waiting for activity log */
 878	atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
 879	atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
 880	atomic_t unacked_cnt;	 /* Need to send replies for */
 881	atomic_t local_cnt;	 /* Waiting for local completion */
 882	atomic_t suspend_cnt;
 
 
 
 
 
 
 
 883
 884	/* Interval tree of pending local requests */
 885	struct rb_root read_requests;
 886	struct rb_root write_requests;
 887
 888	/* for statistics and timeouts */
 889	/* [0] read, [1] write */
 890	struct list_head pending_master_completion[2];
 891	struct list_head pending_completion[2];
 892
 893	/* use checksums for *this* resync */
 894	bool use_csums;
 895	/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
 896	unsigned long rs_total;
 897	/* number of resync blocks that failed in this run */
 898	unsigned long rs_failed;
 899	/* Syncer's start time [unit jiffies] */
 900	unsigned long rs_start;
 901	/* cumulated time in PausedSyncX state [unit jiffies] */
 902	unsigned long rs_paused;
 903	/* skipped because csum was equal [unit BM_BLOCK_SIZE] */
 904	unsigned long rs_same_csum;
 905#define DRBD_SYNC_MARKS 8
 906#define DRBD_SYNC_MARK_STEP (3*HZ)
 907	/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
 908	unsigned long rs_mark_left[DRBD_SYNC_MARKS];
 909	/* marks's time [unit jiffies] */
 910	unsigned long rs_mark_time[DRBD_SYNC_MARKS];
 911	/* current index into rs_mark_{left,time} */
 912	int rs_last_mark;
 913	unsigned long rs_last_bcast; /* [unit jiffies] */
 914
 915	/* where does the admin want us to start? (sector) */
 916	sector_t ov_start_sector;
 917	sector_t ov_stop_sector;
 918	/* where are we now? (sector) */
 919	sector_t ov_position;
 920	/* Start sector of out of sync range (to merge printk reporting). */
 921	sector_t ov_last_oos_start;
 922	/* size of out-of-sync range in sectors. */
 923	sector_t ov_last_oos_size;
 924	unsigned long ov_left; /* in bits */
 
 
 925
 
 
 
 926	struct drbd_bitmap *bitmap;
 927	unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */
 928
 929	/* Used to track operations of resync... */
 930	struct lru_cache *resync;
 931	/* Number of locked elements in resync LRU */
 932	unsigned int resync_locked;
 933	/* resync extent number waiting for application requests */
 934	unsigned int resync_wenr;
 935
 936	int open_cnt;
 937	u64 *p_uuid;
 938
 
 
 
 939	struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */
 940	struct list_head sync_ee;   /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
 941	struct list_head done_ee;   /* need to send P_WRITE_ACK */
 942	struct list_head read_ee;   /* [RS]P_DATA_REQUEST being read */
 943	struct list_head net_ee;    /* zero-copy network send in progress */
 
 
 
 
 
 944
 945	int next_barrier_nr;
 
 946	struct list_head resync_reads;
 947	atomic_t pp_in_use;		/* allocated from page pool */
 948	atomic_t pp_in_use_by_net;	/* sendpage()d, still referenced by tcp */
 949	wait_queue_head_t ee_wait;
 950	struct drbd_md_io md_io;
 
 
 951	spinlock_t al_lock;
 952	wait_queue_head_t al_wait;
 953	struct lru_cache *act_log;	/* activity log */
 954	unsigned int al_tr_number;
 955	int al_tr_cycle;
 
 
 
 
 
 
 
 956	wait_queue_head_t seq_wait;
 957	atomic_t packet_seq;
 958	unsigned int peer_seq;
 959	spinlock_t peer_seq_lock;
 
 960	unsigned long comm_bm_set; /* communicated number of set bits. */
 
 961	struct bm_io_work bm_io_work;
 962	u64 ed_uuid; /* UUID of the exposed data */
 963	struct mutex own_state_mutex;
 964	struct mutex *state_mutex; /* either own_state_mutex or first_peer_device(device)->connection->cstate_mutex */
 965	char congestion_reason;  /* Why we where congested... */
 966	atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
 967	atomic_t rs_sect_ev; /* for submitted resync data rate, both */
 968	int rs_last_sect_ev; /* counter to compare with */
 969	int rs_last_events;  /* counter of read or write "events" (unit sectors)
 970			      * on the lower level device when we last looked. */
 971	int c_sync_rate; /* current resync rate after syncer throttle magic */
 972	struct fifo_buffer *rs_plan_s; /* correction values of resync planer (RCU, connection->conn_update) */
 973	int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
 
 974	atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
 975	unsigned int peer_max_bio_size;
 976	unsigned int local_max_bio_size;
 977
 978	/* any requests that would block in drbd_make_request()
 979	 * are deferred to this single-threaded work queue */
 980	struct submit_worker submit;
 981};
 982
 983struct drbd_bm_aio_ctx {
 984	struct drbd_device *device;
 985	struct list_head list; /* on device->pending_bitmap_io */;
 986	unsigned long start_jif;
 987	atomic_t in_flight;
 988	unsigned int done;
 989	unsigned flags;
 990#define BM_AIO_COPY_PAGES	1
 991#define BM_AIO_WRITE_HINTED	2
 992#define BM_AIO_WRITE_ALL_PAGES	4
 993#define BM_AIO_READ		8
 994	int error;
 995	struct kref kref;
 996};
 997
 998struct drbd_config_context {
 999	/* assigned from drbd_genlmsghdr */
1000	unsigned int minor;
1001	/* assigned from request attributes, if present */
1002	unsigned int volume;
1003#define VOLUME_UNSPECIFIED		(-1U)
1004	/* pointer into the request skb,
1005	 * limited lifetime! */
1006	char *resource_name;
1007	struct nlattr *my_addr;
1008	struct nlattr *peer_addr;
1009
1010	/* reply buffer */
1011	struct sk_buff *reply_skb;
1012	/* pointer into reply buffer */
1013	struct drbd_genlmsghdr *reply_dh;
1014	/* resolved from attributes, if possible */
1015	struct drbd_device *device;
1016	struct drbd_resource *resource;
1017	struct drbd_connection *connection;
1018};
1019
1020static inline struct drbd_device *minor_to_device(unsigned int minor)
1021{
1022	return (struct drbd_device *)idr_find(&drbd_devices, minor);
1023}
1024
1025static inline struct drbd_peer_device *first_peer_device(struct drbd_device *device)
1026{
1027	return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices);
1028}
1029
1030static inline struct drbd_peer_device *
1031conn_peer_device(struct drbd_connection *connection, int volume_number)
 
 
 
 
 
 
 
1032{
1033	return idr_find(&connection->peer_devices, volume_number);
 
 
 
 
 
 
 
1034}
1035
1036#define for_each_resource(resource, _resources) \
1037	list_for_each_entry(resource, _resources, resources)
1038
1039#define for_each_resource_rcu(resource, _resources) \
1040	list_for_each_entry_rcu(resource, _resources, resources)
1041
1042#define for_each_resource_safe(resource, tmp, _resources) \
1043	list_for_each_entry_safe(resource, tmp, _resources, resources)
1044
1045#define for_each_connection(connection, resource) \
1046	list_for_each_entry(connection, &resource->connections, connections)
1047
1048#define for_each_connection_rcu(connection, resource) \
1049	list_for_each_entry_rcu(connection, &resource->connections, connections)
1050
1051#define for_each_connection_safe(connection, tmp, resource) \
1052	list_for_each_entry_safe(connection, tmp, &resource->connections, connections)
1053
1054#define for_each_peer_device(peer_device, device) \
1055	list_for_each_entry(peer_device, &device->peer_devices, peer_devices)
1056
1057#define for_each_peer_device_rcu(peer_device, device) \
1058	list_for_each_entry_rcu(peer_device, &device->peer_devices, peer_devices)
1059
1060#define for_each_peer_device_safe(peer_device, tmp, device) \
1061	list_for_each_entry_safe(peer_device, tmp, &device->peer_devices, peer_devices)
1062
1063static inline unsigned int device_to_minor(struct drbd_device *device)
1064{
1065	return device->minor;
1066}
1067
1068/*
1069 * function declarations
1070 *************************/
1071
1072/* drbd_main.c */
1073
 
 
 
 
 
 
 
 
1074enum dds_flags {
1075	DDSF_FORCED    = 1,
1076	DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */
1077};
1078
1079extern void drbd_init_set_defaults(struct drbd_device *device);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1080extern int  drbd_thread_start(struct drbd_thread *thi);
1081extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait);
1082#ifdef CONFIG_SMP
1083extern void drbd_thread_current_set_cpu(struct drbd_thread *thi);
 
1084#else
1085#define drbd_thread_current_set_cpu(A) ({})
 
1086#endif
1087extern void tl_release(struct drbd_connection *, unsigned int barrier_nr,
 
1088		       unsigned int set_size);
1089extern void tl_clear(struct drbd_connection *);
1090extern void drbd_free_sock(struct drbd_connection *connection);
1091extern int drbd_send(struct drbd_connection *connection, struct socket *sock,
1092		     void *buf, size_t size, unsigned msg_flags);
1093extern int drbd_send_all(struct drbd_connection *, struct socket *, void *, size_t,
1094			 unsigned);
1095
1096extern int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cmd);
1097extern int drbd_send_protocol(struct drbd_connection *connection);
1098extern int drbd_send_uuids(struct drbd_peer_device *);
1099extern int drbd_send_uuids_skip_initial_sync(struct drbd_peer_device *);
1100extern void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *);
1101extern int drbd_send_sizes(struct drbd_peer_device *, int trigger_reply, enum dds_flags flags);
1102extern int drbd_send_state(struct drbd_peer_device *, union drbd_state s);
1103extern int drbd_send_current_state(struct drbd_peer_device *);
1104extern int drbd_send_sync_param(struct drbd_peer_device *);
1105extern void drbd_send_b_ack(struct drbd_connection *connection, u32 barrier_nr,
1106			    u32 set_size);
1107extern int drbd_send_ack(struct drbd_peer_device *, enum drbd_packet,
1108			 struct drbd_peer_request *);
1109extern void drbd_send_ack_rp(struct drbd_peer_device *, enum drbd_packet,
1110			     struct p_block_req *rp);
1111extern void drbd_send_ack_dp(struct drbd_peer_device *, enum drbd_packet,
1112			     struct p_data *dp, int data_size);
1113extern int drbd_send_ack_ex(struct drbd_peer_device *, enum drbd_packet,
 
 
 
 
 
 
 
1114			    sector_t sector, int blksize, u64 block_id);
1115extern int drbd_send_out_of_sync(struct drbd_peer_device *, struct drbd_request *);
1116extern int drbd_send_block(struct drbd_peer_device *, enum drbd_packet,
1117			   struct drbd_peer_request *);
1118extern int drbd_send_dblock(struct drbd_peer_device *, struct drbd_request *req);
1119extern int drbd_send_drequest(struct drbd_peer_device *, int cmd,
1120			      sector_t sector, int size, u64 block_id);
1121extern int drbd_send_drequest_csum(struct drbd_peer_device *, sector_t sector,
1122				   int size, void *digest, int digest_size,
1123				   enum drbd_packet cmd);
1124extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int size);
1125
1126extern int drbd_send_bitmap(struct drbd_device *device);
1127extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
1128extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
1129extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *);
1130extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev);
1131extern void drbd_device_cleanup(struct drbd_device *device);
1132extern void drbd_print_uuids(struct drbd_device *device, const char *text);
1133extern void drbd_queue_unplug(struct drbd_device *device);
1134
1135extern void conn_md_sync(struct drbd_connection *connection);
1136extern void drbd_md_write(struct drbd_device *device, void *buffer);
1137extern void drbd_md_sync(struct drbd_device *device);
1138extern int  drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev);
1139extern void drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local);
1140extern void _drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local);
1141extern void drbd_uuid_new_current(struct drbd_device *device) __must_hold(local);
1142extern void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local);
1143extern void drbd_uuid_move_history(struct drbd_device *device) __must_hold(local);
1144extern void __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local);
1145extern void drbd_md_set_flag(struct drbd_device *device, int flags) __must_hold(local);
1146extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold(local);
1147extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
1148extern void drbd_md_mark_dirty(struct drbd_device *device);
1149extern void drbd_queue_bitmap_io(struct drbd_device *device,
1150				 int (*io_fn)(struct drbd_device *),
1151				 void (*done)(struct drbd_device *, int),
 
 
 
 
 
 
1152				 char *why, enum bm_flag flags);
1153extern int drbd_bitmap_io(struct drbd_device *device,
1154		int (*io_fn)(struct drbd_device *),
1155		char *why, enum bm_flag flags);
1156extern int drbd_bitmap_io_from_worker(struct drbd_device *device,
1157		int (*io_fn)(struct drbd_device *),
1158		char *why, enum bm_flag flags);
1159extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local);
1160extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local);
1161
1162/* Meta data layout
1163 *
1164 * We currently have two possible layouts.
1165 * Offsets in (512 byte) sectors.
1166 * external:
1167 *   |----------- md_size_sect ------------------|
1168 *   [ 4k superblock ][ activity log ][  Bitmap  ]
1169 *   | al_offset == 8 |
1170 *   | bm_offset = al_offset + X      |
1171 *  ==> bitmap sectors = md_size_sect - bm_offset
1172 *
1173 *  Variants:
1174 *     old, indexed fixed size meta data:
1175 *
1176 * internal:
1177 *            |----------- md_size_sect ------------------|
1178 * [data.....][  Bitmap  ][ activity log ][ 4k superblock ][padding*]
1179 *                        | al_offset < 0 |
1180 *            | bm_offset = al_offset - Y |
1181 *  ==> bitmap sectors = Y = al_offset - bm_offset
1182 *
1183 *  [padding*] are zero or up to 7 unused 512 Byte sectors to the
1184 *  end of the device, so that the [4k superblock] will be 4k aligned.
1185 *
1186 *  The activity log consists of 4k transaction blocks,
1187 *  which are written in a ring-buffer, or striped ring-buffer like fashion,
1188 *  which are writtensize used to be fixed 32kB,
1189 *  but is about to become configurable.
1190 */
1191
1192/* Our old fixed size meta data layout
1193 * allows up to about 3.8TB, so if you want more,
1194 * you need to use the "flexible" meta data format. */
1195#define MD_128MB_SECT (128LLU << 11)  /* 128 MB, unit sectors */
1196#define MD_4kB_SECT	 8
1197#define MD_32kB_SECT	64
1198
1199/* One activity log extent represents 4M of storage */
1200#define AL_EXTENT_SHIFT 22
1201#define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT)
1202
1203/* We could make these currently hardcoded constants configurable
1204 * variables at create-md time (or even re-configurable at runtime?).
1205 * Which will require some more changes to the DRBD "super block"
1206 * and attach code.
1207 *
1208 * updates per transaction:
1209 *   This many changes to the active set can be logged with one transaction.
1210 *   This number is arbitrary.
1211 * context per transaction:
1212 *   This many context extent numbers are logged with each transaction.
1213 *   This number is resulting from the transaction block size (4k), the layout
1214 *   of the transaction header, and the number of updates per transaction.
1215 *   See drbd_actlog.c:struct al_transaction_on_disk
1216 * */
1217#define AL_UPDATES_PER_TRANSACTION	 64	// arbitrary
1218#define AL_CONTEXT_PER_TRANSACTION	919	// (4096 - 36 - 6*64)/4
1219
1220#if BITS_PER_LONG == 32
1221#define LN2_BPL 5
1222#define cpu_to_lel(A) cpu_to_le32(A)
1223#define lel_to_cpu(A) le32_to_cpu(A)
1224#elif BITS_PER_LONG == 64
1225#define LN2_BPL 6
1226#define cpu_to_lel(A) cpu_to_le64(A)
1227#define lel_to_cpu(A) le64_to_cpu(A)
1228#else
1229#error "LN2 of BITS_PER_LONG unknown!"
1230#endif
1231
1232/* resync bitmap */
1233/* 16MB sized 'bitmap extent' to track syncer usage */
1234struct bm_extent {
1235	int rs_left; /* number of bits set (out of sync) in this extent. */
1236	int rs_failed; /* number of failed resync requests in this extent. */
1237	unsigned long flags;
1238	struct lc_element lce;
1239};
1240
1241#define BME_NO_WRITES  0  /* bm_extent.flags: no more requests on this one! */
1242#define BME_LOCKED     1  /* bm_extent.flags: syncer active on this one. */
1243#define BME_PRIORITY   2  /* finish resync IO on this extent ASAP! App IO waiting! */
1244
1245/* drbd_bitmap.c */
1246/*
1247 * We need to store one bit for a block.
1248 * Example: 1GB disk @ 4096 byte blocks ==> we need 32 KB bitmap.
1249 * Bit 0 ==> local node thinks this block is binary identical on both nodes
1250 * Bit 1 ==> local node thinks this block needs to be synced.
1251 */
1252
1253#define SLEEP_TIME (HZ/10)
1254
1255/* We do bitmap IO in units of 4k blocks.
1256 * We also still have a hardcoded 4k per bit relation. */
1257#define BM_BLOCK_SHIFT	12			 /* 4k per bit */
1258#define BM_BLOCK_SIZE	 (1<<BM_BLOCK_SHIFT)
1259/* mostly arbitrarily set the represented size of one bitmap extent,
1260 * aka resync extent, to 16 MiB (which is also 512 Byte worth of bitmap
1261 * at 4k per bit resolution) */
1262#define BM_EXT_SHIFT	 24	/* 16 MiB per resync extent */
1263#define BM_EXT_SIZE	 (1<<BM_EXT_SHIFT)
1264
1265#if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12)
1266#error "HAVE YOU FIXED drbdmeta AS WELL??"
1267#endif
1268
1269/* thus many _storage_ sectors are described by one bit */
1270#define BM_SECT_TO_BIT(x)   ((x)>>(BM_BLOCK_SHIFT-9))
1271#define BM_BIT_TO_SECT(x)   ((sector_t)(x)<<(BM_BLOCK_SHIFT-9))
1272#define BM_SECT_PER_BIT     BM_BIT_TO_SECT(1)
1273
1274/* bit to represented kilo byte conversion */
1275#define Bit2KB(bits) ((bits)<<(BM_BLOCK_SHIFT-10))
1276
1277/* in which _bitmap_ extent (resp. sector) the bit for a certain
1278 * _storage_ sector is located in */
1279#define BM_SECT_TO_EXT(x)   ((x)>>(BM_EXT_SHIFT-9))
1280#define BM_BIT_TO_EXT(x)    ((x) >> (BM_EXT_SHIFT - BM_BLOCK_SHIFT))
1281
1282/* first storage sector a bitmap extent corresponds to */
1283#define BM_EXT_TO_SECT(x)   ((sector_t)(x) << (BM_EXT_SHIFT-9))
1284/* how much _storage_ sectors we have per bitmap extent */
1285#define BM_SECT_PER_EXT     BM_EXT_TO_SECT(1)
1286/* how many bits are covered by one bitmap extent (resync extent) */
1287#define BM_BITS_PER_EXT     (1UL << (BM_EXT_SHIFT - BM_BLOCK_SHIFT))
1288
1289#define BM_BLOCKS_PER_BM_EXT_MASK  (BM_BITS_PER_EXT - 1)
1290
1291
1292/* in one sector of the bitmap, we have this many activity_log extents. */
1293#define AL_EXT_PER_BM_SECT  (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT))
 
 
 
 
1294
1295/* the extent in "PER_EXTENT" below is an activity log extent
1296 * we need that many (long words/bytes) to store the bitmap
1297 *		     of one AL_EXTENT_SIZE chunk of storage.
1298 * we can store the bitmap for that many AL_EXTENTS within
1299 * one sector of the _on_disk_ bitmap:
1300 * bit	 0	  bit 37   bit 38	     bit (512*8)-1
1301 *	     ...|........|........|.. // ..|........|
1302 * sect. 0	 `296	  `304			   ^(512*8*8)-1
1303 *
1304#define BM_WORDS_PER_EXT    ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / BITS_PER_LONG )
1305#define BM_BYTES_PER_EXT    ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / 8 )  // 128
1306#define BM_EXT_PER_SECT	    ( 512 / BM_BYTES_PER_EXTENT )	 //   4
1307 */
1308
1309#define DRBD_MAX_SECTORS_32 (0xffffffffLU)
1310/* we have a certain meta data variant that has a fixed on-disk size of 128
1311 * MiB, of which 4k are our "superblock", and 32k are the fixed size activity
1312 * log, leaving this many sectors for the bitmap.
1313 */
1314
1315#define DRBD_MAX_SECTORS_FIXED_BM \
1316	  ((MD_128MB_SECT - MD_32kB_SECT - MD_4kB_SECT) * (1LL<<(BM_EXT_SHIFT-9)))
1317#if !defined(CONFIG_LBDAF) && BITS_PER_LONG == 32
1318#define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_32
1319#define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_32
1320#else
1321#define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_FIXED_BM
1322/* 16 TB in units of sectors */
1323#if BITS_PER_LONG == 32
1324/* adjust by one page worth of bitmap,
1325 * so we won't wrap around in drbd_bm_find_next_bit.
1326 * you should use 64bit OS for that much storage, anyways. */
1327#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff)
1328#else
1329/* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */
1330#define DRBD_MAX_SECTORS_FLEX (1UL << 51)
1331/* corresponds to (1UL << 38) bits right now. */
1332#endif
1333#endif
1334
1335/* Estimate max bio size as 256 * PAGE_SIZE,
1336 * so for typical PAGE_SIZE of 4k, that is (1<<20) Byte.
1337 * Since we may live in a mixed-platform cluster,
1338 * we limit us to a platform agnostic constant here for now.
1339 * A followup commit may allow even bigger BIO sizes,
1340 * once we thought that through. */
1341#define DRBD_MAX_BIO_SIZE (1U << 20)
1342#if DRBD_MAX_BIO_SIZE > (BIO_MAX_PAGES << PAGE_SHIFT)
1343#error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
1344#endif
1345#define DRBD_MAX_BIO_SIZE_SAFE (1U << 12)       /* Works always = 4k */
1346
1347#define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */
1348#define DRBD_MAX_BIO_SIZE_P95    (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */
1349
1350/* For now, don't allow more than half of what we can "activate" in one
1351 * activity log transaction to be discarded in one go. We may need to rework
1352 * drbd_al_begin_io() to allow for even larger discard ranges */
1353#define DRBD_MAX_BATCH_BIO_SIZE	 (AL_UPDATES_PER_TRANSACTION/2*AL_EXTENT_SIZE)
1354#define DRBD_MAX_BBIO_SECTORS    (DRBD_MAX_BATCH_BIO_SIZE >> 9)
1355
1356extern int  drbd_bm_init(struct drbd_device *device);
1357extern int  drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits);
1358extern void drbd_bm_cleanup(struct drbd_device *device);
1359extern void drbd_bm_set_all(struct drbd_device *device);
1360extern void drbd_bm_clear_all(struct drbd_device *device);
1361/* set/clear/test only a few bits at a time */
1362extern int  drbd_bm_set_bits(
1363		struct drbd_device *device, unsigned long s, unsigned long e);
1364extern int  drbd_bm_clear_bits(
1365		struct drbd_device *device, unsigned long s, unsigned long e);
1366extern int drbd_bm_count_bits(
1367	struct drbd_device *device, const unsigned long s, const unsigned long e);
1368/* bm_set_bits variant for use while holding drbd_bm_lock,
1369 * may process the whole bitmap in one go */
1370extern void _drbd_bm_set_bits(struct drbd_device *device,
1371		const unsigned long s, const unsigned long e);
1372extern int  drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr);
1373extern int  drbd_bm_e_weight(struct drbd_device *device, unsigned long enr);
1374extern int  drbd_bm_read(struct drbd_device *device) __must_hold(local);
1375extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr);
1376extern int  drbd_bm_write(struct drbd_device *device) __must_hold(local);
1377extern void drbd_bm_reset_al_hints(struct drbd_device *device) __must_hold(local);
1378extern int  drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local);
1379extern int  drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local);
1380extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local);
1381extern int  drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local);
1382extern size_t	     drbd_bm_words(struct drbd_device *device);
1383extern unsigned long drbd_bm_bits(struct drbd_device *device);
1384extern sector_t      drbd_bm_capacity(struct drbd_device *device);
1385
1386#define DRBD_END_OF_BITMAP	(~(unsigned long)0)
1387extern unsigned long drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo);
1388/* bm_find_next variants for use while you hold drbd_bm_lock() */
1389extern unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo);
1390extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo);
1391extern unsigned long _drbd_bm_total_weight(struct drbd_device *device);
1392extern unsigned long drbd_bm_total_weight(struct drbd_device *device);
 
1393/* for receive_bitmap */
1394extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset,
1395		size_t number, unsigned long *buffer);
1396/* for _drbd_send_bitmap */
1397extern void drbd_bm_get_lel(struct drbd_device *device, size_t offset,
1398		size_t number, unsigned long *buffer);
1399
1400extern void drbd_bm_lock(struct drbd_device *device, char *why, enum bm_flag flags);
1401extern void drbd_bm_unlock(struct drbd_device *device);
1402/* drbd_main.c */
1403
1404extern struct kmem_cache *drbd_request_cache;
1405extern struct kmem_cache *drbd_ee_cache;	/* peer requests */
1406extern struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
1407extern struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
1408extern mempool_t *drbd_request_mempool;
1409extern mempool_t *drbd_ee_mempool;
1410
1411/* drbd's page pool, used to buffer data received from the peer,
1412 * or data requested by the peer.
1413 *
1414 * This does not have an emergency reserve.
1415 *
1416 * When allocating from this pool, it first takes pages from the pool.
1417 * Only if the pool is depleted will try to allocate from the system.
1418 *
1419 * The assumption is that pages taken from this pool will be processed,
1420 * and given back, "quickly", and then can be recycled, so we can avoid
1421 * frequent calls to alloc_page(), and still will be able to make progress even
1422 * under memory pressure.
1423 */
1424extern struct page *drbd_pp_pool;
1425extern spinlock_t   drbd_pp_lock;
1426extern int	    drbd_pp_vacant;
1427extern wait_queue_head_t drbd_pp_wait;
1428
1429/* We also need a standard (emergency-reserve backed) page pool
1430 * for meta data IO (activity log, bitmap).
1431 * We can keep it global, as long as it is used as "N pages at a time".
1432 * 128 should be plenty, currently we probably can get away with as few as 1.
1433 */
1434#define DRBD_MIN_POOL_PAGES	128
1435extern mempool_t *drbd_md_io_page_pool;
1436
1437/* We also need to make sure we get a bio
1438 * when we need it for housekeeping purposes */
1439extern struct bio_set *drbd_md_io_bio_set;
1440/* to allocate from that set */
1441extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
1442
1443/* And a bio_set for cloning */
1444extern struct bio_set *drbd_io_bio_set;
1445
1446extern struct mutex resources_mutex;
1447
1448extern int conn_lowest_minor(struct drbd_connection *connection);
1449extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
1450extern void drbd_destroy_device(struct kref *kref);
1451extern void drbd_delete_device(struct drbd_device *device);
1452
1453extern struct drbd_resource *drbd_create_resource(const char *name);
1454extern void drbd_free_resource(struct drbd_resource *resource);
1455
1456extern int set_resource_options(struct drbd_resource *resource, struct res_opts *res_opts);
1457extern struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts);
1458extern void drbd_destroy_connection(struct kref *kref);
1459extern struct drbd_connection *conn_get_by_addrs(void *my_addr, int my_addr_len,
1460					    void *peer_addr, int peer_addr_len);
1461extern struct drbd_resource *drbd_find_resource(const char *name);
1462extern void drbd_destroy_resource(struct kref *kref);
1463extern void conn_free_crypto(struct drbd_connection *connection);
1464
1465/* drbd_req */
1466extern void do_submit(struct work_struct *ws);
1467extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long);
1468extern blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio);
1469extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req);
1470extern int is_valid_ar_handle(struct drbd_request *, sector_t);
1471
1472
1473/* drbd_nl.c */
1474
1475extern struct mutex notification_mutex;
1476
1477extern void drbd_suspend_io(struct drbd_device *device);
1478extern void drbd_resume_io(struct drbd_device *device);
1479extern char *ppsize(char *buf, unsigned long long size);
1480extern sector_t drbd_new_dev_size(struct drbd_device *, struct drbd_backing_dev *, sector_t, int);
1481enum determine_dev_size {
1482	DS_ERROR_SHRINK = -3,
1483	DS_ERROR_SPACE_MD = -2,
1484	DS_ERROR = -1,
1485	DS_UNCHANGED = 0,
1486	DS_SHRUNK = 1,
1487	DS_GREW = 2,
1488	DS_GREW_FROM_ZERO = 3,
1489};
1490extern enum determine_dev_size
1491drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local);
1492extern void resync_after_online_grow(struct drbd_device *);
1493extern void drbd_reconsider_queue_parameters(struct drbd_device *device,
1494			struct drbd_backing_dev *bdev, struct o_qlim *o);
1495extern enum drbd_state_rv drbd_set_role(struct drbd_device *device,
1496					enum drbd_role new_role,
1497					int force);
1498extern bool conn_try_outdate_peer(struct drbd_connection *connection);
1499extern void conn_try_outdate_peer_async(struct drbd_connection *connection);
1500extern enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd);
1501extern int drbd_khelper(struct drbd_device *device, char *cmd);
1502
1503/* drbd_worker.c */
1504/* bi_end_io handlers */
1505extern void drbd_md_endio(struct bio *bio);
1506extern void drbd_peer_request_endio(struct bio *bio);
1507extern void drbd_request_endio(struct bio *bio);
1508extern int drbd_worker(struct drbd_thread *thi);
1509enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor);
1510void drbd_resync_after_changed(struct drbd_device *device);
1511extern void drbd_start_resync(struct drbd_device *device, enum drbd_conns side);
1512extern void resume_next_sg(struct drbd_device *device);
1513extern void suspend_other_sg(struct drbd_device *device);
1514extern int drbd_resync_finished(struct drbd_device *device);
1515/* maybe rather drbd_main.c ? */
1516extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent);
1517extern void drbd_md_put_buffer(struct drbd_device *device);
1518extern int drbd_md_sync_page_io(struct drbd_device *device,
1519		struct drbd_backing_dev *bdev, sector_t sector, int op);
1520extern void drbd_ov_out_of_sync_found(struct drbd_device *, sector_t, int);
1521extern void wait_until_done_or_force_detached(struct drbd_device *device,
1522		struct drbd_backing_dev *bdev, unsigned int *done);
1523extern void drbd_rs_controller_reset(struct drbd_device *device);
1524
1525static inline void ov_out_of_sync_print(struct drbd_device *device)
1526{
1527	if (device->ov_last_oos_size) {
1528		drbd_err(device, "Out of sync: start=%llu, size=%lu (sectors)\n",
1529		     (unsigned long long)device->ov_last_oos_start,
1530		     (unsigned long)device->ov_last_oos_size);
1531	}
1532	device->ov_last_oos_size = 0;
1533}
1534
1535
1536extern void drbd_csum_bio(struct crypto_ahash *, struct bio *, void *);
1537extern void drbd_csum_ee(struct crypto_ahash *, struct drbd_peer_request *, void *);
1538/* worker callbacks */
1539extern int w_e_end_data_req(struct drbd_work *, int);
1540extern int w_e_end_rsdata_req(struct drbd_work *, int);
1541extern int w_e_end_csum_rs_req(struct drbd_work *, int);
1542extern int w_e_end_ov_reply(struct drbd_work *, int);
1543extern int w_e_end_ov_req(struct drbd_work *, int);
1544extern int w_ov_finished(struct drbd_work *, int);
1545extern int w_resync_timer(struct drbd_work *, int);
1546extern int w_send_write_hint(struct drbd_work *, int);
1547extern int w_send_dblock(struct drbd_work *, int);
1548extern int w_send_read_req(struct drbd_work *, int);
1549extern int w_e_reissue(struct drbd_work *, int);
1550extern int w_restart_disk_io(struct drbd_work *, int);
1551extern int w_send_out_of_sync(struct drbd_work *, int);
1552extern int w_start_resync(struct drbd_work *, int);
1553
1554extern void resync_timer_fn(struct timer_list *t);
1555extern void start_resync_timer_fn(struct timer_list *t);
 
 
1556
1557extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
 
1558
1559/* drbd_receiver.c */
1560extern int drbd_receiver(struct drbd_thread *thi);
1561extern int drbd_ack_receiver(struct drbd_thread *thi);
1562extern void drbd_send_ping_wf(struct work_struct *ws);
1563extern void drbd_send_acks_wf(struct work_struct *ws);
1564extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
1565extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
1566		bool throttle_if_app_is_waiting);
1567extern int drbd_submit_peer_request(struct drbd_device *,
1568				    struct drbd_peer_request *, const unsigned,
1569				    const unsigned, const int);
1570extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
1571extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64,
1572						     sector_t, unsigned int,
1573						     unsigned int,
1574						     gfp_t) __must_hold(local);
1575extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *,
1576				 int);
1577#define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0)
1578#define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1)
1579extern struct page *drbd_alloc_pages(struct drbd_peer_device *, unsigned int, bool);
1580extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled);
1581extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed);
1582extern int drbd_connected(struct drbd_peer_device *);
 
 
 
 
 
 
 
 
 
 
 
 
1583
1584static inline void drbd_tcp_cork(struct socket *sock)
1585{
1586	int val = 1;
1587	(void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK,
1588			(char*)&val, sizeof(val));
1589}
1590
1591static inline void drbd_tcp_uncork(struct socket *sock)
1592{
1593	int val = 0;
1594	(void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK,
1595			(char*)&val, sizeof(val));
1596}
1597
1598static inline void drbd_tcp_nodelay(struct socket *sock)
1599{
1600	int val = 1;
1601	(void) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
1602			(char*)&val, sizeof(val));
1603}
1604
1605static inline void drbd_tcp_quickack(struct socket *sock)
1606{
1607	int val = 2;
1608	(void) kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
1609			(char*)&val, sizeof(val));
1610}
1611
1612/* sets the number of 512 byte sectors of our virtual device */
1613static inline void drbd_set_my_capacity(struct drbd_device *device,
1614					sector_t size)
1615{
1616	/* set_capacity(device->this_bdev->bd_disk, size); */
1617	set_capacity(device->vdisk, size);
1618	device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
1619}
1620
1621/*
1622 * used to submit our private bio
1623 */
1624static inline void drbd_generic_make_request(struct drbd_device *device,
1625					     int fault_type, struct bio *bio)
1626{
1627	__release(local);
1628	if (!bio->bi_disk) {
1629		drbd_err(device, "drbd_generic_make_request: bio->bi_disk == NULL\n");
1630		bio->bi_status = BLK_STS_IOERR;
1631		bio_endio(bio);
1632		return;
1633	}
1634
1635	if (drbd_insert_fault(device, fault_type))
1636		bio_io_error(bio);
1637	else
1638		generic_make_request(bio);
1639}
1640
1641void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1642			      enum write_ordering_e wo);
1643
1644/* drbd_proc.c */
1645extern struct proc_dir_entry *drbd_proc;
1646extern const struct file_operations drbd_proc_fops;
 
 
1647
1648/* drbd_actlog.c */
1649extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
1650extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i);
1651extern void drbd_al_begin_io_commit(struct drbd_device *device);
1652extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i);
1653extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i);
1654extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i);
1655extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector);
1656extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector);
1657extern int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector);
1658extern void drbd_rs_cancel_all(struct drbd_device *device);
1659extern int drbd_rs_del_all(struct drbd_device *device);
1660extern void drbd_rs_failed_io(struct drbd_device *device,
1661		sector_t sector, int size);
1662extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go);
 
 
 
 
 
 
 
 
 
 
 
1663
1664enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC };
1665extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
1666		enum update_sync_bits_mode mode);
1667#define drbd_set_in_sync(device, sector, size) \
1668	__drbd_change_sync(device, sector, size, SET_IN_SYNC)
1669#define drbd_set_out_of_sync(device, sector, size) \
1670	__drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC)
1671#define drbd_rs_failed_io(device, sector, size) \
1672	__drbd_change_sync(device, sector, size, RECORD_RS_FAILED)
1673extern void drbd_al_shrink(struct drbd_device *device);
1674extern int drbd_al_initialize(struct drbd_device *, void *);
1675
1676/* drbd_nl.c */
1677/* state info broadcast */
1678struct sib_info {
1679	enum drbd_state_info_bcast_reason sib_reason;
1680	union {
1681		struct {
1682			char *helper_name;
1683			unsigned helper_exit_code;
1684		};
1685		struct {
1686			union drbd_state os;
1687			union drbd_state ns;
1688		};
1689	};
1690};
1691void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib);
1692
1693extern void notify_resource_state(struct sk_buff *,
1694				  unsigned int,
1695				  struct drbd_resource *,
1696				  struct resource_info *,
1697				  enum drbd_notification_type);
1698extern void notify_device_state(struct sk_buff *,
1699				unsigned int,
1700				struct drbd_device *,
1701				struct device_info *,
1702				enum drbd_notification_type);
1703extern void notify_connection_state(struct sk_buff *,
1704				    unsigned int,
1705				    struct drbd_connection *,
1706				    struct connection_info *,
1707				    enum drbd_notification_type);
1708extern void notify_peer_device_state(struct sk_buff *,
1709				     unsigned int,
1710				     struct drbd_peer_device *,
1711				     struct peer_device_info *,
1712				     enum drbd_notification_type);
1713extern void notify_helper(enum drbd_notification_type, struct drbd_device *,
1714			  struct drbd_connection *, const char *, int);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1715
1716/*
1717 * inline helper functions
1718 *************************/
1719
1720/* see also page_chain_add and friends in drbd_receiver.c */
1721static inline struct page *page_chain_next(struct page *page)
1722{
1723	return (struct page *)page_private(page);
1724}
1725#define page_chain_for_each(page) \
1726	for (; page && ({ prefetch(page_chain_next(page)); 1; }); \
1727			page = page_chain_next(page))
1728#define page_chain_for_each_safe(page, n) \
1729	for (; page && ({ n = page_chain_next(page); 1; }); page = n)
1730
 
 
 
 
 
 
 
 
 
 
 
 
1731
1732static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_req)
1733{
1734	struct page *page = peer_req->pages;
1735	page_chain_for_each(page) {
1736		if (page_count(page) > 1)
1737			return 1;
1738	}
1739	return 0;
1740}
1741
1742static inline union drbd_state drbd_read_state(struct drbd_device *device)
 
 
 
 
 
 
 
1743{
1744	struct drbd_resource *resource = device->resource;
1745	union drbd_state rv;
 
 
 
 
 
 
 
1746
1747	rv.i = device->state.i;
1748	rv.susp = resource->susp;
1749	rv.susp_nod = resource->susp_nod;
1750	rv.susp_fen = resource->susp_fen;
1751
1752	return rv;
1753}
1754
1755enum drbd_force_detach_flags {
1756	DRBD_READ_ERROR,
1757	DRBD_WRITE_ERROR,
1758	DRBD_META_IO_ERROR,
1759	DRBD_FORCE_DETACH,
1760};
 
 
 
 
 
 
 
 
 
 
1761
1762#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
1763static inline void __drbd_chk_io_error_(struct drbd_device *device,
1764		enum drbd_force_detach_flags df,
1765		const char *where)
1766{
1767	enum drbd_io_error_p ep;
1768
1769	rcu_read_lock();
1770	ep = rcu_dereference(device->ldev->disk_conf)->on_io_error;
1771	rcu_read_unlock();
1772	switch (ep) {
1773	case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */
1774		if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) {
1775			if (__ratelimit(&drbd_ratelimit_state))
1776				drbd_err(device, "Local IO failed in %s.\n", where);
1777			if (device->state.disk > D_INCONSISTENT)
1778				_drbd_set_state(_NS(device, disk, D_INCONSISTENT), CS_HARD, NULL);
1779			break;
1780		}
1781		/* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */
1782	case EP_DETACH:
1783	case EP_CALL_HELPER:
1784		/* Remember whether we saw a READ or WRITE error.
1785		 *
1786		 * Recovery of the affected area for WRITE failure is covered
1787		 * by the activity log.
1788		 * READ errors may fall outside that area though. Certain READ
1789		 * errors can be "healed" by writing good data to the affected
1790		 * blocks, which triggers block re-allocation in lower layers.
1791		 *
1792		 * If we can not write the bitmap after a READ error,
1793		 * we may need to trigger a full sync (see w_go_diskless()).
1794		 *
1795		 * Force-detach is not really an IO error, but rather a
1796		 * desperate measure to try to deal with a completely
1797		 * unresponsive lower level IO stack.
1798		 * Still it should be treated as a WRITE error.
1799		 *
1800		 * Meta IO error is always WRITE error:
1801		 * we read meta data only once during attach,
1802		 * which will fail in case of errors.
1803		 */
1804		set_bit(WAS_IO_ERROR, &device->flags);
1805		if (df == DRBD_READ_ERROR)
1806			set_bit(WAS_READ_ERROR, &device->flags);
1807		if (df == DRBD_FORCE_DETACH)
1808			set_bit(FORCE_DETACH, &device->flags);
1809		if (device->state.disk > D_FAILED) {
1810			_drbd_set_state(_NS(device, disk, D_FAILED), CS_HARD, NULL);
1811			drbd_err(device,
1812				"Local IO failed in %s. Detaching...\n", where);
1813		}
1814		break;
1815	}
1816}
1817
1818/**
1819 * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers
1820 * @device:	 DRBD device.
1821 * @error:	 Error code passed to the IO completion callback
1822 * @forcedetach: Force detach. I.e. the error happened while accessing the meta data
1823 *
1824 * See also drbd_main.c:after_state_ch() if (os.disk > D_FAILED && ns.disk == D_FAILED)
1825 */
1826#define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__)
1827static inline void drbd_chk_io_error_(struct drbd_device *device,
1828	int error, enum drbd_force_detach_flags forcedetach, const char *where)
1829{
1830	if (error) {
1831		unsigned long flags;
1832		spin_lock_irqsave(&device->resource->req_lock, flags);
1833		__drbd_chk_io_error_(device, forcedetach, where);
1834		spin_unlock_irqrestore(&device->resource->req_lock, flags);
1835	}
1836}
1837
1838
1839/**
1840 * drbd_md_first_sector() - Returns the first sector number of the meta data area
1841 * @bdev:	Meta data block device.
1842 *
1843 * BTW, for internal meta data, this happens to be the maximum capacity
1844 * we could agree upon with our peer node.
1845 */
1846static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
1847{
1848	switch (bdev->md.meta_dev_idx) {
1849	case DRBD_MD_INDEX_INTERNAL:
1850	case DRBD_MD_INDEX_FLEX_INT:
1851		return bdev->md.md_offset + bdev->md.bm_offset;
1852	case DRBD_MD_INDEX_FLEX_EXT:
1853	default:
1854		return bdev->md.md_offset;
1855	}
1856}
1857
1858/**
1859 * drbd_md_last_sector() - Return the last sector number of the meta data area
1860 * @bdev:	Meta data block device.
1861 */
1862static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
1863{
1864	switch (bdev->md.meta_dev_idx) {
1865	case DRBD_MD_INDEX_INTERNAL:
1866	case DRBD_MD_INDEX_FLEX_INT:
1867		return bdev->md.md_offset + MD_4kB_SECT -1;
1868	case DRBD_MD_INDEX_FLEX_EXT:
1869	default:
1870		return bdev->md.md_offset + bdev->md.md_size_sect -1;
1871	}
1872}
1873
1874/* Returns the number of 512 byte sectors of the device */
1875static inline sector_t drbd_get_capacity(struct block_device *bdev)
1876{
1877	/* return bdev ? get_capacity(bdev->bd_disk) : 0; */
1878	return bdev ? i_size_read(bdev->bd_inode) >> 9 : 0;
1879}
1880
1881/**
1882 * drbd_get_max_capacity() - Returns the capacity we announce to out peer
1883 * @bdev:	Meta data block device.
1884 *
1885 * returns the capacity we announce to out peer.  we clip ourselves at the
1886 * various MAX_SECTORS, because if we don't, current implementation will
1887 * oops sooner or later
1888 */
1889static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
1890{
1891	sector_t s;
1892
1893	switch (bdev->md.meta_dev_idx) {
1894	case DRBD_MD_INDEX_INTERNAL:
1895	case DRBD_MD_INDEX_FLEX_INT:
1896		s = drbd_get_capacity(bdev->backing_bdev)
1897			? min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
1898				drbd_md_first_sector(bdev))
1899			: 0;
1900		break;
1901	case DRBD_MD_INDEX_FLEX_EXT:
1902		s = min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
1903				drbd_get_capacity(bdev->backing_bdev));
1904		/* clip at maximum size the meta device can support */
1905		s = min_t(sector_t, s,
1906			BM_EXT_TO_SECT(bdev->md.md_size_sect
1907				     - bdev->md.bm_offset));
1908		break;
1909	default:
1910		s = min_t(sector_t, DRBD_MAX_SECTORS,
1911				drbd_get_capacity(bdev->backing_bdev));
1912	}
1913	return s;
1914}
1915
1916/**
1917 * drbd_md_ss() - Return the sector number of our meta data super block
 
1918 * @bdev:	Meta data block device.
1919 */
1920static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev)
 
1921{
1922	const int meta_dev_idx = bdev->md.meta_dev_idx;
1923
1924	if (meta_dev_idx == DRBD_MD_INDEX_FLEX_EXT)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1925		return 0;
1926
1927	/* Since drbd08, internal meta data is always "flexible".
1928	 * position: last 4k aligned block of 4k size */
1929	if (meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1930	    meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)
1931		return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) - 8;
1932
1933	/* external, some index; this is the old fixed size layout */
1934	return MD_128MB_SECT * bdev->md.meta_dev_idx;
1935}
1936
1937static inline void
1938drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
1939{
1940	unsigned long flags;
1941	spin_lock_irqsave(&q->q_lock, flags);
1942	list_add_tail(&w->list, &q->q);
 
 
1943	spin_unlock_irqrestore(&q->q_lock, flags);
1944	wake_up(&q->q_wait);
1945}
1946
1947static inline void
1948drbd_queue_work_if_unqueued(struct drbd_work_queue *q, struct drbd_work *w)
1949{
1950	unsigned long flags;
1951	spin_lock_irqsave(&q->q_lock, flags);
1952	if (list_empty_careful(&w->list))
1953		list_add_tail(&w->list, &q->q);
 
1954	spin_unlock_irqrestore(&q->q_lock, flags);
1955	wake_up(&q->q_wait);
1956}
1957
1958static inline void
1959drbd_device_post_work(struct drbd_device *device, int work_bit)
 
 
 
 
 
 
 
 
 
 
 
 
1960{
1961	if (!test_and_set_bit(work_bit, &device->flags)) {
1962		struct drbd_connection *connection =
1963			first_peer_device(device)->connection;
1964		struct drbd_work_queue *q = &connection->sender_work;
1965		if (!test_and_set_bit(DEVICE_WORK_PENDING, &connection->flags))
1966			wake_up(&q->q_wait);
1967	}
1968}
1969
1970extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue);
 
 
 
 
1971
1972/* To get the ack_receiver out of the blocking network stack,
1973 * so it can change its sk_rcvtimeo from idle- to ping-timeout,
1974 * and send a ping, we need to send a signal.
1975 * Which signal we send is irrelevant. */
1976static inline void wake_ack_receiver(struct drbd_connection *connection)
1977{
1978	struct task_struct *task = connection->ack_receiver.task;
1979	if (task && get_t_state(&connection->ack_receiver) == RUNNING)
1980		force_sig(SIGXCPU, task);
1981}
1982
1983static inline void request_ping(struct drbd_connection *connection)
1984{
1985	set_bit(SEND_PING, &connection->flags);
1986	wake_ack_receiver(connection);
1987}
1988
1989extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *);
1990extern void *drbd_prepare_command(struct drbd_peer_device *, struct drbd_socket *);
1991extern int conn_send_command(struct drbd_connection *, struct drbd_socket *,
1992			     enum drbd_packet, unsigned int, void *,
1993			     unsigned int);
1994extern int drbd_send_command(struct drbd_peer_device *, struct drbd_socket *,
1995			     enum drbd_packet, unsigned int, void *,
1996			     unsigned int);
1997
1998extern int drbd_send_ping(struct drbd_connection *connection);
1999extern int drbd_send_ping_ack(struct drbd_connection *connection);
2000extern int drbd_send_state_req(struct drbd_peer_device *, union drbd_state, union drbd_state);
2001extern int conn_send_state_req(struct drbd_connection *, union drbd_state, union drbd_state);
2002
2003static inline void drbd_thread_stop(struct drbd_thread *thi)
2004{
2005	_drbd_thread_stop(thi, false, true);
2006}
2007
2008static inline void drbd_thread_stop_nowait(struct drbd_thread *thi)
2009{
2010	_drbd_thread_stop(thi, false, false);
2011}
2012
2013static inline void drbd_thread_restart_nowait(struct drbd_thread *thi)
2014{
2015	_drbd_thread_stop(thi, true, false);
2016}
2017
2018/* counts how many answer packets packets we expect from our peer,
2019 * for either explicit application requests,
2020 * or implicit barrier packets as necessary.
2021 * increased:
2022 *  w_send_barrier
2023 *  _req_mod(req, QUEUE_FOR_NET_WRITE or QUEUE_FOR_NET_READ);
2024 *    it is much easier and equally valid to count what we queue for the
2025 *    worker, even before it actually was queued or send.
2026 *    (drbd_make_request_common; recovery path on read io-error)
2027 * decreased:
2028 *  got_BarrierAck (respective tl_clear, tl_clear_barrier)
2029 *  _req_mod(req, DATA_RECEIVED)
2030 *     [from receive_DataReply]
2031 *  _req_mod(req, WRITE_ACKED_BY_PEER or RECV_ACKED_BY_PEER or NEG_ACKED)
2032 *     [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)]
2033 *     for some reason it is NOT decreased in got_NegAck,
2034 *     but in the resulting cleanup code from report_params.
2035 *     we should try to remember the reason for that...
2036 *  _req_mod(req, SEND_FAILED or SEND_CANCELED)
2037 *  _req_mod(req, CONNECTION_LOST_WHILE_PENDING)
2038 *     [from tl_clear_barrier]
2039 */
2040static inline void inc_ap_pending(struct drbd_device *device)
2041{
2042	atomic_inc(&device->ap_pending_cnt);
2043}
2044
2045#define ERR_IF_CNT_IS_NEGATIVE(which, func, line)			\
2046	if (atomic_read(&device->which) < 0)				\
2047		drbd_err(device, "in %s:%d: " #which " = %d < 0 !\n",	\
2048			func, line,					\
2049			atomic_read(&device->which))
2050
2051#define dec_ap_pending(device) _dec_ap_pending(device, __func__, __LINE__)
2052static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line)
2053{
2054	if (atomic_dec_and_test(&device->ap_pending_cnt))
2055		wake_up(&device->misc_wait);
2056	ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line);
2057}
2058
2059/* counts how many resync-related answers we still expect from the peer
2060 *		     increase			decrease
2061 * C_SYNC_TARGET sends P_RS_DATA_REQUEST (and expects P_RS_DATA_REPLY)
2062 * C_SYNC_SOURCE sends P_RS_DATA_REPLY   (and expects P_WRITE_ACK with ID_SYNCER)
2063 *					   (or P_NEG_ACK with ID_SYNCER)
2064 */
2065static inline void inc_rs_pending(struct drbd_device *device)
2066{
2067	atomic_inc(&device->rs_pending_cnt);
2068}
2069
2070#define dec_rs_pending(device) _dec_rs_pending(device, __func__, __LINE__)
2071static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line)
2072{
2073	atomic_dec(&device->rs_pending_cnt);
2074	ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line);
2075}
2076
2077/* counts how many answers we still need to send to the peer.
2078 * increased on
2079 *  receive_Data	unless protocol A;
2080 *			we need to send a P_RECV_ACK (proto B)
2081 *			or P_WRITE_ACK (proto C)
2082 *  receive_RSDataReply (recv_resync_read) we need to send a P_WRITE_ACK
2083 *  receive_DataRequest (receive_RSDataRequest) we need to send back P_DATA
2084 *  receive_Barrier_*	we need to send a P_BARRIER_ACK
2085 */
2086static inline void inc_unacked(struct drbd_device *device)
2087{
2088	atomic_inc(&device->unacked_cnt);
2089}
2090
2091#define dec_unacked(device) _dec_unacked(device, __func__, __LINE__)
2092static inline void _dec_unacked(struct drbd_device *device, const char *func, int line)
2093{
2094	atomic_dec(&device->unacked_cnt);
2095	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
2096}
 
 
 
2097
2098#define sub_unacked(device, n) _sub_unacked(device, n, __func__, __LINE__)
2099static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line)
2100{
2101	atomic_sub(n, &device->unacked_cnt);
2102	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
2103}
2104
2105static inline bool is_sync_target_state(enum drbd_conns connection_state)
2106{
2107	return	connection_state == C_SYNC_TARGET ||
2108		connection_state == C_PAUSED_SYNC_T;
2109}
2110
2111static inline bool is_sync_source_state(enum drbd_conns connection_state)
 
 
 
 
 
 
2112{
2113	return	connection_state == C_SYNC_SOURCE ||
2114		connection_state == C_PAUSED_SYNC_S;
2115}
2116
2117static inline bool is_sync_state(enum drbd_conns connection_state)
2118{
2119	return	is_sync_source_state(connection_state) ||
2120		is_sync_target_state(connection_state);
 
2121}
2122
2123/**
2124 * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev
2125 * @_device:		DRBD device.
2126 * @_min_state:		Minimum device state required for success.
2127 *
2128 * You have to call put_ldev() when finished working with device->ldev.
2129 */
2130#define get_ldev_if_state(_device, _min_state)				\
2131	(_get_ldev_if_state((_device), (_min_state)) ?			\
2132	 ({ __acquire(x); true; }) : false)
2133#define get_ldev(_device) get_ldev_if_state(_device, D_INCONSISTENT)
2134
2135static inline void put_ldev(struct drbd_device *device)
2136{
2137	enum drbd_disk_state disk_state = device->state.disk;
2138	/* We must check the state *before* the atomic_dec becomes visible,
2139	 * or we have a theoretical race where someone hitting zero,
2140	 * while state still D_FAILED, will then see D_DISKLESS in the
2141	 * condition below and calling into destroy, where he must not, yet. */
2142	int i = atomic_dec_return(&device->local_cnt);
2143
2144	/* This may be called from some endio handler,
2145	 * so we must not sleep here. */
2146
2147	__release(local);
2148	D_ASSERT(device, i >= 0);
2149	if (i == 0) {
2150		if (disk_state == D_DISKLESS)
2151			/* even internal references gone, safe to destroy */
2152			drbd_device_post_work(device, DESTROY_DISK);
2153		if (disk_state == D_FAILED)
2154			/* all application IO references gone. */
2155			if (!test_and_set_bit(GOING_DISKLESS, &device->flags))
2156				drbd_device_post_work(device, GO_DISKLESS);
2157		wake_up(&device->misc_wait);
2158	}
2159}
2160
2161#ifndef __CHECKER__
2162static inline int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins)
2163{
2164	int io_allowed;
2165
2166	/* never get a reference while D_DISKLESS */
2167	if (device->state.disk == D_DISKLESS)
2168		return 0;
2169
2170	atomic_inc(&device->local_cnt);
2171	io_allowed = (device->state.disk >= mins);
2172	if (!io_allowed)
2173		put_ldev(device);
2174	return io_allowed;
2175}
2176#else
2177extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins);
2178#endif
2179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2180/* this throttles on-the-fly application requests
2181 * according to max_buffers settings;
2182 * maybe re-implement using semaphores? */
2183static inline int drbd_get_max_buffers(struct drbd_device *device)
2184{
2185	struct net_conf *nc;
2186	int mxb;
2187
2188	rcu_read_lock();
2189	nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2190	mxb = nc ? nc->max_buffers : 1000000;  /* arbitrary limit on open requests */
2191	rcu_read_unlock();
2192
2193	return mxb;
2194}
2195
2196static inline int drbd_state_is_stable(struct drbd_device *device)
2197{
2198	union drbd_dev_state s = device->state;
2199
2200	/* DO NOT add a default clause, we want the compiler to warn us
2201	 * for any newly introduced state we may have forgotten to add here */
2202
2203	switch ((enum drbd_conns)s.conn) {
2204	/* new io only accepted when there is no connection, ... */
2205	case C_STANDALONE:
2206	case C_WF_CONNECTION:
2207	/* ... or there is a well established connection. */
2208	case C_CONNECTED:
2209	case C_SYNC_SOURCE:
2210	case C_SYNC_TARGET:
2211	case C_VERIFY_S:
2212	case C_VERIFY_T:
2213	case C_PAUSED_SYNC_S:
2214	case C_PAUSED_SYNC_T:
2215	case C_AHEAD:
2216	case C_BEHIND:
2217		/* transitional states, IO allowed */
2218	case C_DISCONNECTING:
2219	case C_UNCONNECTED:
2220	case C_TIMEOUT:
2221	case C_BROKEN_PIPE:
2222	case C_NETWORK_FAILURE:
2223	case C_PROTOCOL_ERROR:
2224	case C_TEAR_DOWN:
2225	case C_WF_REPORT_PARAMS:
2226	case C_STARTING_SYNC_S:
2227	case C_STARTING_SYNC_T:
2228		break;
2229
2230		/* Allow IO in BM exchange states with new protocols */
2231	case C_WF_BITMAP_S:
2232		if (first_peer_device(device)->connection->agreed_pro_version < 96)
2233			return 0;
2234		break;
2235
2236		/* no new io accepted in these states */
2237	case C_WF_BITMAP_T:
2238	case C_WF_SYNC_UUID:
2239	case C_MASK:
2240		/* not "stable" */
2241		return 0;
2242	}
2243
2244	switch ((enum drbd_disk_state)s.disk) {
2245	case D_DISKLESS:
2246	case D_INCONSISTENT:
2247	case D_OUTDATED:
2248	case D_CONSISTENT:
2249	case D_UP_TO_DATE:
2250	case D_FAILED:
2251		/* disk state is stable as well. */
2252		break;
2253
2254	/* no new io accepted during transitional states */
2255	case D_ATTACHING:
 
2256	case D_NEGOTIATING:
2257	case D_UNKNOWN:
2258	case D_MASK:
2259		/* not "stable" */
2260		return 0;
2261	}
2262
2263	return 1;
2264}
2265
2266static inline int drbd_suspended(struct drbd_device *device)
2267{
2268	struct drbd_resource *resource = device->resource;
2269
2270	return resource->susp || resource->susp_fen || resource->susp_nod;
2271}
2272
2273static inline bool may_inc_ap_bio(struct drbd_device *device)
2274{
2275	int mxb = drbd_get_max_buffers(device);
2276
2277	if (drbd_suspended(device))
2278		return false;
2279	if (atomic_read(&device->suspend_cnt))
2280		return false;
2281
2282	/* to avoid potential deadlock or bitmap corruption,
2283	 * in various places, we only allow new application io
2284	 * to start during "stable" states. */
2285
2286	/* no new io accepted when attaching or detaching the disk */
2287	if (!drbd_state_is_stable(device))
2288		return false;
2289
2290	/* since some older kernels don't have atomic_add_unless,
2291	 * and we are within the spinlock anyways, we have this workaround.  */
2292	if (atomic_read(&device->ap_bio_cnt) > mxb)
2293		return false;
2294	if (test_bit(BITMAP_IO, &device->flags))
2295		return false;
2296	return true;
2297}
2298
2299static inline bool inc_ap_bio_cond(struct drbd_device *device)
2300{
2301	bool rv = false;
2302
2303	spin_lock_irq(&device->resource->req_lock);
2304	rv = may_inc_ap_bio(device);
2305	if (rv)
2306		atomic_inc(&device->ap_bio_cnt);
2307	spin_unlock_irq(&device->resource->req_lock);
2308
2309	return rv;
2310}
2311
2312static inline void inc_ap_bio(struct drbd_device *device)
2313{
2314	/* we wait here
2315	 *    as long as the device is suspended
2316	 *    until the bitmap is no longer on the fly during connection
2317	 *    handshake as long as we would exceed the max_buffer limit.
2318	 *
2319	 * to avoid races with the reconnect code,
2320	 * we need to atomic_inc within the spinlock. */
2321
2322	wait_event(device->misc_wait, inc_ap_bio_cond(device));
2323}
2324
2325static inline void dec_ap_bio(struct drbd_device *device)
2326{
2327	int mxb = drbd_get_max_buffers(device);
2328	int ap_bio = atomic_dec_return(&device->ap_bio_cnt);
2329
2330	D_ASSERT(device, ap_bio >= 0);
2331
2332	if (ap_bio == 0 && test_bit(BITMAP_IO, &device->flags)) {
2333		if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags))
2334			drbd_queue_work(&first_peer_device(device)->
2335				connection->sender_work,
2336				&device->bm_io_work.w);
2337	}
2338
 
2339	/* this currently does wake_up for every dec_ap_bio!
2340	 * maybe rather introduce some type of hysteresis?
2341	 * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */
2342	if (ap_bio < mxb)
2343		wake_up(&device->misc_wait);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2344}
 
 
 
 
 
 
2345
2346static inline bool verify_can_do_stop_sector(struct drbd_device *device)
2347{
2348	return first_peer_device(device)->connection->agreed_pro_version >= 97 &&
2349		first_peer_device(device)->connection->agreed_pro_version != 100;
 
 
 
 
 
2350}
2351
2352static inline int drbd_set_ed_uuid(struct drbd_device *device, u64 val)
2353{
2354	int changed = device->ed_uuid != val;
2355	device->ed_uuid = val;
2356	return changed;
2357}
2358
2359static inline int drbd_queue_order_type(struct drbd_device *device)
2360{
2361	/* sorry, we currently have no working implementation
2362	 * of distributed TCQ stuff */
2363#ifndef QUEUE_ORDERED_NONE
2364#define QUEUE_ORDERED_NONE 0
2365#endif
2366	return QUEUE_ORDERED_NONE;
2367}
2368
2369static inline struct drbd_connection *first_connection(struct drbd_resource *resource)
2370{
2371	return list_first_entry_or_null(&resource->connections,
2372				struct drbd_connection, connections);
 
 
 
 
 
 
 
 
2373}
2374
2375#endif