Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
  1/* SPDX-License-Identifier: GPL-2.0 */
  2#ifndef _BCACHEFS_EXTENTS_FORMAT_H
  3#define _BCACHEFS_EXTENTS_FORMAT_H
  4
  5/*
  6 * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
  7 * preceded by checksum/compression information (bch_extent_crc32 or
  8 * bch_extent_crc64).
  9 *
 10 * One major determining factor in the format of extents is how we handle and
 11 * represent extents that have been partially overwritten and thus trimmed:
 12 *
 13 * If an extent is not checksummed or compressed, when the extent is trimmed we
 14 * don't have to remember the extent we originally allocated and wrote: we can
 15 * merely adjust ptr->offset to point to the start of the data that is currently
 16 * live. The size field in struct bkey records the current (live) size of the
 17 * extent, and is also used to mean "size of region on disk that we point to" in
 18 * this case.
 19 *
 20 * Thus an extent that is not checksummed or compressed will consist only of a
 21 * list of bch_extent_ptrs, with none of the fields in
 22 * bch_extent_crc32/bch_extent_crc64.
 23 *
 24 * When an extent is checksummed or compressed, it's not possible to read only
 25 * the data that is currently live: we have to read the entire extent that was
 26 * originally written, and then return only the part of the extent that is
 27 * currently live.
 28 *
 29 * Thus, in addition to the current size of the extent in struct bkey, we need
 30 * to store the size of the originally allocated space - this is the
 31 * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
 32 * when the extent is trimmed, instead of modifying the offset field of the
 33 * pointer, we keep a second smaller offset field - "offset into the original
 34 * extent of the currently live region".
 35 *
 36 * The other major determining factor is replication and data migration:
 37 *
 38 * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
 39 * write, we will initially write all the replicas in the same format, with the
 40 * same checksum type and compression format - however, when copygc runs later (or
 41 * tiering/cache promotion, anything that moves data), it is not in general
 42 * going to rewrite all the pointers at once - one of the replicas may be in a
 43 * bucket on one device that has very little fragmentation while another lives
 44 * in a bucket that has become heavily fragmented, and thus is being rewritten
 45 * sooner than the rest.
 46 *
 47 * Thus it will only move a subset of the pointers (or in the case of
 48 * tiering/cache promotion perhaps add a single pointer without dropping any
 49 * current pointers), and if the extent has been partially overwritten it must
 50 * write only the currently live portion (or copygc would not be able to reduce
 51 * fragmentation!) - which necessitates a different bch_extent_crc format for
 52 * the new pointer.
 53 *
 54 * But in the interests of space efficiency, we don't want to store one
 55 * bch_extent_crc for each pointer if we don't have to.
 56 *
 57 * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
 58 * bch_extent_ptrs appended arbitrarily one after the other. We determine the
 59 * type of a given entry with a scheme similar to utf8 (except we're encoding a
 60 * type, not a size), encoding the type in the position of the first set bit:
 61 *
 62 * bch_extent_crc32	- 0b1
 63 * bch_extent_ptr	- 0b10
 64 * bch_extent_crc64	- 0b100
 65 *
 66 * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
 67 * bch_extent_crc64 is the least constrained).
 68 *
 69 * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
 70 * until the next bch_extent_crc32/64.
 71 *
 72 * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
 73 * is neither checksummed nor compressed.
 74 */
 75
 76#define BCH_EXTENT_ENTRY_TYPES()		\
 77	x(ptr,			0)		\
 78	x(crc32,		1)		\
 79	x(crc64,		2)		\
 80	x(crc128,		3)		\
 81	x(stripe_ptr,		4)		\
 82	x(rebalance,		5)
 83#define BCH_EXTENT_ENTRY_MAX	6
 84
 85enum bch_extent_entry_type {
 86#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
 87	BCH_EXTENT_ENTRY_TYPES()
 88#undef x
 89};
 90
 91/* Compressed/uncompressed size are stored biased by 1: */
 92struct bch_extent_crc32 {
 93#if defined(__LITTLE_ENDIAN_BITFIELD)
 94	__u32			type:2,
 95				_compressed_size:7,
 96				_uncompressed_size:7,
 97				offset:7,
 98				_unused:1,
 99				csum_type:4,
100				compression_type:4;
101	__u32			csum;
102#elif defined (__BIG_ENDIAN_BITFIELD)
103	__u32			csum;
104	__u32			compression_type:4,
105				csum_type:4,
106				_unused:1,
107				offset:7,
108				_uncompressed_size:7,
109				_compressed_size:7,
110				type:2;
111#endif
112} __packed __aligned(8);
113
114#define CRC32_SIZE_MAX		(1U << 7)
115#define CRC32_NONCE_MAX		0
116
117struct bch_extent_crc64 {
118#if defined(__LITTLE_ENDIAN_BITFIELD)
119	__u64			type:3,
120				_compressed_size:9,
121				_uncompressed_size:9,
122				offset:9,
123				nonce:10,
124				csum_type:4,
125				compression_type:4,
126				csum_hi:16;
127#elif defined (__BIG_ENDIAN_BITFIELD)
128	__u64			csum_hi:16,
129				compression_type:4,
130				csum_type:4,
131				nonce:10,
132				offset:9,
133				_uncompressed_size:9,
134				_compressed_size:9,
135				type:3;
136#endif
137	__u64			csum_lo;
138} __packed __aligned(8);
139
140#define CRC64_SIZE_MAX		(1U << 9)
141#define CRC64_NONCE_MAX		((1U << 10) - 1)
142
143struct bch_extent_crc128 {
144#if defined(__LITTLE_ENDIAN_BITFIELD)
145	__u64			type:4,
146				_compressed_size:13,
147				_uncompressed_size:13,
148				offset:13,
149				nonce:13,
150				csum_type:4,
151				compression_type:4;
152#elif defined (__BIG_ENDIAN_BITFIELD)
153	__u64			compression_type:4,
154				csum_type:4,
155				nonce:13,
156				offset:13,
157				_uncompressed_size:13,
158				_compressed_size:13,
159				type:4;
160#endif
161	struct bch_csum		csum;
162} __packed __aligned(8);
163
164#define CRC128_SIZE_MAX		(1U << 13)
165#define CRC128_NONCE_MAX	((1U << 13) - 1)
166
167/*
168 * @reservation - pointer hasn't been written to, just reserved
169 */
170struct bch_extent_ptr {
171#if defined(__LITTLE_ENDIAN_BITFIELD)
172	__u64			type:1,
173				cached:1,
174				unused:1,
175				unwritten:1,
176				offset:44, /* 8 petabytes */
177				dev:8,
178				gen:8;
179#elif defined (__BIG_ENDIAN_BITFIELD)
180	__u64			gen:8,
181				dev:8,
182				offset:44,
183				unwritten:1,
184				unused:1,
185				cached:1,
186				type:1;
187#endif
188} __packed __aligned(8);
189
190struct bch_extent_stripe_ptr {
191#if defined(__LITTLE_ENDIAN_BITFIELD)
192	__u64			type:5,
193				block:8,
194				redundancy:4,
195				idx:47;
196#elif defined (__BIG_ENDIAN_BITFIELD)
197	__u64			idx:47,
198				redundancy:4,
199				block:8,
200				type:5;
201#endif
202};
203
204struct bch_extent_rebalance {
205#if defined(__LITTLE_ENDIAN_BITFIELD)
206	__u64			type:6,
207				unused:34,
208				compression:8, /* enum bch_compression_opt */
209				target:16;
210#elif defined (__BIG_ENDIAN_BITFIELD)
211	__u64			target:16,
212				compression:8,
213				unused:34,
214				type:6;
215#endif
216};
217
218union bch_extent_entry {
219#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ||  __BITS_PER_LONG == 64
220	unsigned long			type;
221#elif __BITS_PER_LONG == 32
222	struct {
223		unsigned long		pad;
224		unsigned long		type;
225	};
226#else
227#error edit for your odd byteorder.
228#endif
229
230#define x(f, n) struct bch_extent_##f	f;
231	BCH_EXTENT_ENTRY_TYPES()
232#undef x
233};
234
235struct bch_btree_ptr {
236	struct bch_val		v;
237
238	__u64			_data[0];
239	struct bch_extent_ptr	start[];
240} __packed __aligned(8);
241
242struct bch_btree_ptr_v2 {
243	struct bch_val		v;
244
245	__u64			mem_ptr;
246	__le64			seq;
247	__le16			sectors_written;
248	__le16			flags;
249	struct bpos		min_key;
250	__u64			_data[0];
251	struct bch_extent_ptr	start[];
252} __packed __aligned(8);
253
254LE16_BITMASK(BTREE_PTR_RANGE_UPDATED,	struct bch_btree_ptr_v2, flags, 0, 1);
255
256struct bch_extent {
257	struct bch_val		v;
258
259	__u64			_data[0];
260	union bch_extent_entry	start[];
261} __packed __aligned(8);
262
263/* Maximum size (in u64s) a single pointer could be: */
264#define BKEY_EXTENT_PTR_U64s_MAX\
265	((sizeof(struct bch_extent_crc128) +			\
266	  sizeof(struct bch_extent_ptr)) / sizeof(__u64))
267
268/* Maximum possible size of an entire extent value: */
269#define BKEY_EXTENT_VAL_U64s_MAX				\
270	(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
271
272/* * Maximum possible size of an entire extent, key + value: */
273#define BKEY_EXTENT_U64s_MAX		(BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
274
275/* Btree pointers don't carry around checksums: */
276#define BKEY_BTREE_PTR_VAL_U64s_MAX				\
277	((sizeof(struct bch_btree_ptr_v2) +			\
278	  sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
279#define BKEY_BTREE_PTR_U64s_MAX					\
280	(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
281
282struct bch_reservation {
283	struct bch_val		v;
284
285	__le32			generation;
286	__u8			nr_replicas;
287	__u8			pad[3];
288} __packed __aligned(8);
289
290struct bch_inline_data {
291	struct bch_val		v;
292	u8			data[];
293};
294
295#endif /* _BCACHEFS_EXTENTS_FORMAT_H */