[linux.git] / drivers / md / raid1.h

#ifndef _RAID1_H
#define _RAID1_H

struct raid1_info {
	struct md_rdev	*rdev;
	sector_t	head_position;

	/* When choose the best device for a read (read_balance())
	 * we try to keep sequential reads one the same device
	 */
	sector_t	next_seq_sect;
	sector_t	seq_start;
};

/*
 * memory pools need a pointer to the mddev, so they can force an unplug
 * when memory is tight, and a count of the number of drives that the
 * pool was allocated for, so they know how much to allocate and free.
 * mddev->raid_disks cannot be used, as it can change while a pool is active
 * These two datums are stored in a kmalloced struct.
 * The 'raid_disks' here is twice the raid_disks in r1conf.
 * This allows space for each 'real' device can have a replacement in the
 * second half of the array.
 */

struct pool_info {
	struct mddev *mddev;
	int	raid_disks;
};

struct r1conf {
	struct mddev		*mddev;
	struct raid1_info	*mirrors;	/* twice 'raid_disks' to
						 * allow for replacements.
						 */
	int			raid_disks;

	/* During resync, read_balancing is only allowed on the part
	 * of the array that has been resynced.  'next_resync' tells us
	 * where that is.
	 */
	sector_t		next_resync;

	/* When raid1 starts resync, we divide array into four partitions
	 * |---------|--------------|---------------------|-------------|
	 *        next_resync   start_next_window       end_window
	 * start_next_window = next_resync + NEXT_NORMALIO_DISTANCE
	 * end_window = start_next_window + NEXT_NORMALIO_DISTANCE
	 * current_window_requests means the count of normalIO between
	 *   start_next_window and end_window.
	 * next_window_requests means the count of normalIO after end_window.
	 * */
	sector_t		start_next_window;
	int			current_window_requests;
	int			next_window_requests;

	spinlock_t		device_lock;

	/* list of 'struct r1bio' that need to be processed by raid1d,
	 * whether to retry a read, writeout a resync or recovery
	 * block, or anything else.
	 */
	struct list_head	retry_list;

	/* queue pending writes to be submitted on unplug */
	struct bio_list		pending_bio_list;
	int			pending_count;

	/* for use when syncing mirrors:
	 * We don't allow both normal IO and resync/recovery IO at
	 * the same time - resync/recovery can only happen when there
	 * is no other IO.  So when either is active, the other has to wait.
	 * See more details description in raid1.c near raise_barrier().
	 */
	wait_queue_head_t	wait_barrier;
	spinlock_t		resync_lock;
	int			nr_pending;
	int			nr_waiting;
	int			nr_queued;
	int			barrier;
	int			array_frozen;

	/* Set to 1 if a full sync is needed, (fresh device added).
	 * Cleared when a sync completes.
	 */
	int			fullsync;

	/* When the same as mddev->recovery_disabled we don't allow
	 * recovery to be attempted as we expect a read error.
	 */
	int			recovery_disabled;


	/* poolinfo contains information about the content of the
	 * mempools - it changes when the array grows or shrinks
	 */
	struct pool_info	*poolinfo;
	mempool_t		*r1bio_pool;
	mempool_t		*r1buf_pool;

	/* temporary buffer to synchronous IO when attempting to repair
	 * a read error.
	 */
	struct page		*tmppage;


	/* When taking over an array from a different personality, we store
	 * the new thread here until we fully activate the array.
	 */
	struct md_thread	*thread;
};

/*
 * this is our 'private' RAID1 bio.
 *
 * it contains information about what kind of IO operations were started
 * for this RAID1 operation, and about their status:
 */

struct r1bio {
	atomic_t		remaining; /* 'have we finished' count,
					    * used from IRQ handlers
					    */
	atomic_t		behind_remaining; /* number of write-behind ios remaining
						 * in this BehindIO request
						 */
	sector_t		sector;
	sector_t		start_next_window;
	int			sectors;
	unsigned long		state;
	struct mddev		*mddev;
	/*
	 * original bio going to /dev/mdx
	 */
	struct bio		*master_bio;
	/*
	 * if the IO is in READ direction, then this is where we read
	 */
	int			read_disk;

	struct list_head	retry_list;
	/* Next two are only valid when R1BIO_BehindIO is set */
	struct bio_vec		*behind_bvecs;
	int			behind_page_count;
	/*
	 * if the IO is in WRITE direction, then multiple bios are used.
	 * We choose the number when they are allocated.
	 */
	struct bio		*bios[0];
	/* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
};

/* bits for r1bio.state */
#define	R1BIO_Uptodate	0
#define	R1BIO_IsSync	1
#define	R1BIO_Degraded	2
#define	R1BIO_BehindIO	3
/* Set ReadError on bios that experience a readerror so that
 * raid1d knows what to do with them.
 */
#define R1BIO_ReadError 4
/* For write-behind requests, we call bi_end_io when
 * the last non-write-behind device completes, providing
 * any write was successful.  Otherwise we call when
 * any write-behind write succeeds, otherwise we call
 * with failure when last write completes (and all failed).
 * Record that bi_end_io was called with this flag...
 */
#define	R1BIO_Returned 6
/* If a write for this request means we can clear some
 * known-bad-block records, we set this flag
 */
#define	R1BIO_MadeGood 7
#define	R1BIO_WriteError 8

extern int md_raid1_congested(struct mddev *mddev, int bits);

#endif
Commit	Line	Data
1da177e4 LT	1	#ifndef _RAID1_H
	2	#define _RAID1_H
	3
0eaf822c	4	struct raid1_info {
3cb03002	5	struct md_rdev *rdev;
1da177e4	6	sector_t head_position;
be4d3280 SL	7
	8	/* When choose the best device for a read (read_balance())
	9	* we try to keep sequential reads one the same device
	10	*/
	11	sector_t next_seq_sect;
12cee5a8	12	sector_t seq_start;
1da177e4 LT	13	};
	14
	15	/*
	16	* memory pools need a pointer to the mddev, so they can force an unplug
	17	* when memory is tight, and a count of the number of drives that the
	18	* pool was allocated for, so they know how much to allocate and free.
	19	* mddev->raid_disks cannot be used, as it can change while a pool is active
	20	* These two datums are stored in a kmalloced struct.
8f19ccb2 N	21	* The 'raid_disks' here is twice the raid_disks in r1conf.
	22	* This allows space for each 'real' device can have a replacement in the
	23	* second half of the array.
1da177e4 LT	24	*/
	25
	26	struct pool_info {
fd01b88c	27	struct mddev *mddev;
1da177e4 LT	28	int raid_disks;
	29	};
	30
e8096360	31	struct r1conf {
fd01b88c	32	struct mddev *mddev;
0eaf822c	33	struct raid1_info mirrors; / twice 'raid_disks' to
8f19ccb2 N	34	* allow for replacements.
8f19ccb2 N	35	*/
1da177e4	36	int raid_disks;
ce550c20	37
ce550c20 N	38	/* During resync, read_balancing is only allowed on the part
	39	* of the array that has been resynced. 'next_resync' tells us
	40	* where that is.
	41	*/
	42	sector_t next_resync;
	43
79ef3a8a	44	/* When raid1 starts resync, we divide array into four partitions
	45	* \|---------\|--------------\|---------------------\|-------------\|
	46	* next_resync start_next_window end_window
	47	* start_next_window = next_resync + NEXT_NORMALIO_DISTANCE
	48	* end_window = start_next_window + NEXT_NORMALIO_DISTANCE
	49	* current_window_requests means the count of normalIO between
	50	* start_next_window and end_window.
	51	* next_window_requests means the count of normalIO after end_window.
	52	* */
	53	sector_t start_next_window;
	54	int current_window_requests;
	55	int next_window_requests;
	56
1da177e4 LT	57	spinlock_t device_lock;
1da177e4 LT	58
9f2c9d12 N	59	/* list of 'struct r1bio' that need to be processed by raid1d,
	60	* whether to retry a read, writeout a resync or recovery
	61	* block, or anything else.
ce550c20	62	*/
1da177e4	63	struct list_head retry_list;
191ea9b2	64
ce550c20 N	65	/* queue pending writes to be submitted on unplug */
ce550c20 N	66	struct bio_list pending_bio_list;
34db0cd6	67	int pending_count;
1da177e4	68
ce550c20 N	69	/* for use when syncing mirrors:
	70	* We don't allow both normal IO and resync/recovery IO at
	71	* the same time - resync/recovery can only happen when there
	72	* is no other IO. So when either is active, the other has to wait.
	73	* See more details description in raid1.c near raise_barrier().
	74	*/
	75	wait_queue_head_t wait_barrier;
1da177e4	76	spinlock_t resync_lock;
191ea9b2	77	int nr_pending;
17999be4	78	int nr_waiting;
ddaf22ab	79	int nr_queued;
191ea9b2	80	int barrier;
b364e3d0	81	int array_frozen;
1da177e4	82
ce550c20 N	83	/* Set to 1 if a full sync is needed, (fresh device added).
	84	* Cleared when a sync completes.
	85	*/
	86	int fullsync;
1da177e4	87
ce550c20 N	88	/* When the same as mddev->recovery_disabled we don't allow
	89	* recovery to be attempted as we expect a read error.
	90	*/
	91	int recovery_disabled;
1da177e4	92
ddaf22ab	93
ce550c20 N	94	/* poolinfo contains information about the content of the
	95	* mempools - it changes when the array grows or shrinks
	96	*/
	97	struct pool_info *poolinfo;
9f2c9d12 N	98	mempool_t *r1bio_pool;
9f2c9d12 N	99	mempool_t *r1buf_pool;
709ae487	100
ce550c20 N	101	/* temporary buffer to synchronous IO when attempting to repair
	102	* a read error.
	103	*/
	104	struct page *tmppage;
	105
	106
709ae487 N	107	/* When taking over an array from a different personality, we store
	108	* the new thread here until we fully activate the array.
	109	*/
2b8bf345	110	struct md_thread *thread;
1da177e4 LT	111	};
1da177e4 LT	112
1da177e4 LT	113	/*
	114	* this is our 'private' RAID1 bio.
	115	*
	116	* it contains information about what kind of IO operations were started
	117	* for this RAID1 operation, and about their status:
	118	*/
	119
9f2c9d12	120	struct r1bio {
1da177e4 LT	121	atomic_t remaining; /* 'have we finished' count,
	122	* used from IRQ handlers
	123	*/
4b6d287f N	124	atomic_t behind_remaining; /* number of write-behind ios remaining
	125	* in this BehindIO request
	126	*/
1da177e4	127	sector_t sector;
79ef3a8a	128	sector_t start_next_window;
1da177e4 LT	129	int sectors;
1da177e4 LT	130	unsigned long state;
fd01b88c	131	struct mddev *mddev;
1da177e4 LT	132	/*
	133	* original bio going to /dev/mdx
	134	*/
	135	struct bio *master_bio;
	136	/*
	137	* if the IO is in READ direction, then this is where we read
	138	*/
	139	int read_disk;
	140
	141	struct list_head retry_list;
af6d7b76	142	/* Next two are only valid when R1BIO_BehindIO is set */
2ca68f5e	143	struct bio_vec *behind_bvecs;
af6d7b76	144	int behind_page_count;
1da177e4 LT	145	/*
	146	* if the IO is in WRITE direction, then multiple bios are used.
	147	* We choose the number when they are allocated.
	148	*/
	149	struct bio *bios[0];
191ea9b2	150	/* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
1da177e4 LT	151	};
	152
	153	/* bits for r1bio.state */
	154	#define R1BIO_Uptodate 0
	155	#define R1BIO_IsSync 1
191ea9b2	156	#define R1BIO_Degraded 2
a9701a30	157	#define R1BIO_BehindIO 3
d2eb35ac N	158	/* Set ReadError on bios that experience a readerror so that
	159	* raid1d knows what to do with them.
	160	*/
	161	#define R1BIO_ReadError 4
4b6d287f N	162	/* For write-behind requests, we call bi_end_io when
	163	* the last non-write-behind device completes, providing
	164	* any write was successful. Otherwise we call when
	165	* any write-behind write succeeds, otherwise we call
	166	* with failure when last write completes (and all failed).
	167	* Record that bi_end_io was called with this flag...
	168	*/
9e71f9c8	169	#define R1BIO_Returned 6
4367af55 N	170	/* If a write for this request means we can clear some
	171	* known-bad-block records, we set this flag
	172	*/
cd5ff9a1 N	173	#define R1BIO_MadeGood 7
cd5ff9a1 N	174	#define R1BIO_WriteError 8
4b6d287f	175
fd01b88c	176	extern int md_raid1_congested(struct mddev *mddev, int bits);
1ed7242e	177
1da177e4	178	#endif