Loading...
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2012 Fusion-io All rights reserved.
4 * Copyright (C) 2012 Intel Corp. All rights reserved.
5 */
6
7#ifndef BTRFS_RAID56_H
8#define BTRFS_RAID56_H
9
10#include <linux/workqueue.h>
11#include "volumes.h"
12
13enum btrfs_rbio_ops {
14 BTRFS_RBIO_WRITE,
15 BTRFS_RBIO_READ_REBUILD,
16 BTRFS_RBIO_PARITY_SCRUB,
17};
18
19struct btrfs_raid_bio {
20 struct btrfs_io_context *bioc;
21
22 /*
23 * While we're doing RMW on a stripe we put it into a hash table so we
24 * can lock the stripe and merge more rbios into it.
25 */
26 struct list_head hash_list;
27
28 /* LRU list for the stripe cache */
29 struct list_head stripe_cache;
30
31 /* For scheduling work in the helper threads */
32 struct work_struct work;
33
34 /*
35 * bio_list and bio_list_lock are used to add more bios into the stripe
36 * in hopes of avoiding the full RMW
37 */
38 struct bio_list bio_list;
39 spinlock_t bio_list_lock;
40
41 /*
42 * Also protected by the bio_list_lock, the plug list is used by the
43 * plugging code to collect partial bios while plugged. The stripe
44 * locking code also uses it to hand off the stripe lock to the next
45 * pending IO.
46 */
47 struct list_head plug_list;
48
49 /* Flags that tell us if it is safe to merge with this bio. */
50 unsigned long flags;
51
52 /*
53 * Set if we're doing a parity rebuild for a read from higher up, which
54 * is handled differently from a parity rebuild as part of RMW.
55 */
56 enum btrfs_rbio_ops operation;
57
58 /* How many pages there are for the full stripe including P/Q */
59 u16 nr_pages;
60
61 /* How many sectors there are for the full stripe including P/Q */
62 u16 nr_sectors;
63
64 /* Number of data stripes (no p/q) */
65 u8 nr_data;
66
67 /* Number of all stripes (including P/Q) */
68 u8 real_stripes;
69
70 /* How many pages there are for each stripe */
71 u8 stripe_npages;
72
73 /* How many sectors there are for each stripe */
74 u8 stripe_nsectors;
75
76 /* Stripe number that we're scrubbing */
77 u8 scrubp;
78
79 /*
80 * Size of all the bios in the bio_list. This helps us decide if the
81 * rbio maps to a full stripe or not.
82 */
83 int bio_list_bytes;
84
85 refcount_t refs;
86
87 atomic_t stripes_pending;
88
89 wait_queue_head_t io_wait;
90
91 /* Bitmap to record which horizontal stripe has data */
92 unsigned long dbitmap;
93
94 /* Allocated with stripe_nsectors-many bits for finish_*() calls */
95 unsigned long finish_pbitmap;
96
97 /*
98 * These are two arrays of pointers. We allocate the rbio big enough
99 * to hold them both and setup their locations when the rbio is
100 * allocated.
101 */
102
103 /*
104 * Pointers to pages that we allocated for reading/writing stripes
105 * directly from the disk (including P/Q).
106 */
107 struct page **stripe_pages;
108
109 /* Pointers to the sectors in the bio_list, for faster lookup */
110 struct sector_ptr *bio_sectors;
111
112 /*
113 * For subpage support, we need to map each sector to above
114 * stripe_pages.
115 */
116 struct sector_ptr *stripe_sectors;
117
118 /* Allocated with real_stripes-many pointers for finish_*() calls */
119 void **finish_pointers;
120
121 /*
122 * The bitmap recording where IO errors happened.
123 * Each bit is corresponding to one sector in either bio_sectors[] or
124 * stripe_sectors[] array.
125 *
126 * The reason we don't use another bit in sector_ptr is, we have two
127 * arrays of sectors, and a lot of IO can use sectors in both arrays.
128 * Thus making it much harder to iterate.
129 */
130 unsigned long *error_bitmap;
131
132 /*
133 * Checksum buffer if the rbio is for data. The buffer should cover
134 * all data sectors (excluding P/Q sectors).
135 */
136 u8 *csum_buf;
137
138 /*
139 * Each bit represents if the corresponding sector has data csum found.
140 * Should only cover data sectors (excluding P/Q sectors).
141 */
142 unsigned long *csum_bitmap;
143};
144
145/*
146 * For trace event usage only. Records useful debug info for each bio submitted
147 * by RAID56 to each physical device.
148 *
149 * No matter signed or not, (-1) is always the one indicating we can not grab
150 * the proper stripe number.
151 */
152struct raid56_bio_trace_info {
153 u64 devid;
154
155 /* The offset inside the stripe. (<= STRIPE_LEN) */
156 u32 offset;
157
158 /*
159 * Stripe number.
160 * 0 is the first data stripe, and nr_data for P stripe,
161 * nr_data + 1 for Q stripe.
162 * >= real_stripes for
163 */
164 u8 stripe_nr;
165};
166
167static inline int nr_data_stripes(const struct btrfs_chunk_map *map)
168{
169 return map->num_stripes - btrfs_nr_parity_stripes(map->type);
170}
171
172static inline int nr_bioc_data_stripes(const struct btrfs_io_context *bioc)
173{
174 return bioc->num_stripes - btrfs_nr_parity_stripes(bioc->map_type);
175}
176
177#define RAID5_P_STRIPE ((u64)-2)
178#define RAID6_Q_STRIPE ((u64)-1)
179
180#define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \
181 ((x) == RAID6_Q_STRIPE))
182
183struct btrfs_device;
184
185void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
186 int mirror_num);
187void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
188
189struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
190 struct btrfs_io_context *bioc,
191 struct btrfs_device *scrub_dev,
192 unsigned long *dbitmap, int stripe_nsectors);
193void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
194
195void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
196 struct page **data_pages, u64 data_logical);
197
198int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
199void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
200
201#endif
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2012 Fusion-io All rights reserved.
4 * Copyright (C) 2012 Intel Corp. All rights reserved.
5 */
6
7#ifndef BTRFS_RAID56_H
8#define BTRFS_RAID56_H
9
10#include <linux/types.h>
11#include <linux/list.h>
12#include <linux/spinlock.h>
13#include <linux/bio.h>
14#include <linux/refcount.h>
15#include <linux/workqueue.h>
16#include "volumes.h"
17
18struct page;
19struct sector_ptr;
20struct btrfs_fs_info;
21
22enum btrfs_rbio_ops {
23 BTRFS_RBIO_WRITE,
24 BTRFS_RBIO_READ_REBUILD,
25 BTRFS_RBIO_PARITY_SCRUB,
26};
27
28struct btrfs_raid_bio {
29 struct btrfs_io_context *bioc;
30
31 /*
32 * While we're doing RMW on a stripe we put it into a hash table so we
33 * can lock the stripe and merge more rbios into it.
34 */
35 struct list_head hash_list;
36
37 /* LRU list for the stripe cache */
38 struct list_head stripe_cache;
39
40 /* For scheduling work in the helper threads */
41 struct work_struct work;
42
43 /*
44 * bio_list and bio_list_lock are used to add more bios into the stripe
45 * in hopes of avoiding the full RMW
46 */
47 struct bio_list bio_list;
48 spinlock_t bio_list_lock;
49
50 /*
51 * Also protected by the bio_list_lock, the plug list is used by the
52 * plugging code to collect partial bios while plugged. The stripe
53 * locking code also uses it to hand off the stripe lock to the next
54 * pending IO.
55 */
56 struct list_head plug_list;
57
58 /* Flags that tell us if it is safe to merge with this bio. */
59 unsigned long flags;
60
61 /*
62 * Set if we're doing a parity rebuild for a read from higher up, which
63 * is handled differently from a parity rebuild as part of RMW.
64 */
65 enum btrfs_rbio_ops operation;
66
67 /* How many pages there are for the full stripe including P/Q */
68 u16 nr_pages;
69
70 /* How many sectors there are for the full stripe including P/Q */
71 u16 nr_sectors;
72
73 /* Number of data stripes (no p/q) */
74 u8 nr_data;
75
76 /* Number of all stripes (including P/Q) */
77 u8 real_stripes;
78
79 /* How many pages there are for each stripe */
80 u8 stripe_npages;
81
82 /* How many sectors there are for each stripe */
83 u8 stripe_nsectors;
84
85 /* Stripe number that we're scrubbing */
86 u8 scrubp;
87
88 /*
89 * Size of all the bios in the bio_list. This helps us decide if the
90 * rbio maps to a full stripe or not.
91 */
92 int bio_list_bytes;
93
94 refcount_t refs;
95
96 atomic_t stripes_pending;
97
98 wait_queue_head_t io_wait;
99
100 /* Bitmap to record which horizontal stripe has data */
101 unsigned long dbitmap;
102
103 /* Allocated with stripe_nsectors-many bits for finish_*() calls */
104 unsigned long finish_pbitmap;
105
106 /*
107 * These are two arrays of pointers. We allocate the rbio big enough
108 * to hold them both and setup their locations when the rbio is
109 * allocated.
110 */
111
112 /*
113 * Pointers to pages that we allocated for reading/writing stripes
114 * directly from the disk (including P/Q).
115 */
116 struct page **stripe_pages;
117
118 /* Pointers to the sectors in the bio_list, for faster lookup */
119 struct sector_ptr *bio_sectors;
120
121 /*
122 * For subpage support, we need to map each sector to above
123 * stripe_pages.
124 */
125 struct sector_ptr *stripe_sectors;
126
127 /* Allocated with real_stripes-many pointers for finish_*() calls */
128 void **finish_pointers;
129
130 /*
131 * The bitmap recording where IO errors happened.
132 * Each bit is corresponding to one sector in either bio_sectors[] or
133 * stripe_sectors[] array.
134 *
135 * The reason we don't use another bit in sector_ptr is, we have two
136 * arrays of sectors, and a lot of IO can use sectors in both arrays.
137 * Thus making it much harder to iterate.
138 */
139 unsigned long *error_bitmap;
140
141 /*
142 * Checksum buffer if the rbio is for data. The buffer should cover
143 * all data sectors (excluding P/Q sectors).
144 */
145 u8 *csum_buf;
146
147 /*
148 * Each bit represents if the corresponding sector has data csum found.
149 * Should only cover data sectors (excluding P/Q sectors).
150 */
151 unsigned long *csum_bitmap;
152};
153
154/*
155 * For trace event usage only. Records useful debug info for each bio submitted
156 * by RAID56 to each physical device.
157 *
158 * No matter signed or not, (-1) is always the one indicating we can not grab
159 * the proper stripe number.
160 */
161struct raid56_bio_trace_info {
162 u64 devid;
163
164 /* The offset inside the stripe. (<= STRIPE_LEN) */
165 u32 offset;
166
167 /*
168 * Stripe number.
169 * 0 is the first data stripe, and nr_data for P stripe,
170 * nr_data + 1 for Q stripe.
171 * >= real_stripes for
172 */
173 u8 stripe_nr;
174};
175
176static inline int nr_data_stripes(const struct btrfs_chunk_map *map)
177{
178 return map->num_stripes - btrfs_nr_parity_stripes(map->type);
179}
180
181static inline int nr_bioc_data_stripes(const struct btrfs_io_context *bioc)
182{
183 return bioc->num_stripes - btrfs_nr_parity_stripes(bioc->map_type);
184}
185
186#define RAID5_P_STRIPE ((u64)-2)
187#define RAID6_Q_STRIPE ((u64)-1)
188
189#define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \
190 ((x) == RAID6_Q_STRIPE))
191
192struct btrfs_device;
193
194void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
195 int mirror_num);
196void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
197
198struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
199 struct btrfs_io_context *bioc,
200 struct btrfs_device *scrub_dev,
201 unsigned long *dbitmap, int stripe_nsectors);
202void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
203
204void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
205 struct page **data_pages, u64 data_logical);
206
207int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
208void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
209
210#endif