Linux Audio

Check our new training course

Linux kernel drivers training

May 6-19, 2025
Register
Loading...
v4.17
  1// SPDX-License-Identifier: GPL-2.0
  2#include <linux/ceph/ceph_debug.h>
  3
  4#include <linux/bug.h>
  5#include <linux/err.h>
  6#include <linux/random.h>
  7#include <linux/slab.h>
  8#include <linux/types.h>
  9
 10#include <linux/ceph/mdsmap.h>
 11#include <linux/ceph/messenger.h>
 12#include <linux/ceph/decode.h>
 13
 14#include "super.h"
 15
 16
 17/*
 18 * choose a random mds that is "up" (i.e. has a state > 0), or -1.
 19 */
 20int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 21{
 22	int n = 0;
 23	int i;
 24
 25	/* special case for one mds */
 26	if (1 == m->m_num_mds && m->m_info[0].state > 0)
 27		return 0;
 28
 29	/* count */
 30	for (i = 0; i < m->m_num_mds; i++)
 31		if (m->m_info[i].state > 0)
 32			n++;
 33	if (n == 0)
 34		return -1;
 35
 36	/* pick */
 37	n = prandom_u32() % n;
 
 38	i = 0;
 39	for (i = 0; n > 0; i++, n--)
 40		while (m->m_info[i].state <= 0)
 41			i++;
 42
 43	return i;
 44}
 45
 46#define __decode_and_drop_type(p, end, type, bad)		\
 47	do {							\
 48		if (*p + sizeof(type) > end)			\
 49			goto bad;				\
 50		*p += sizeof(type);				\
 51	} while (0)
 52
 53#define __decode_and_drop_set(p, end, type, bad)		\
 54	do {							\
 55		u32 n;						\
 56		size_t need;					\
 57		ceph_decode_32_safe(p, end, n, bad);		\
 58		need = sizeof(type) * n;			\
 59		ceph_decode_need(p, end, need, bad);		\
 60		*p += need;					\
 61	} while (0)
 62
 63#define __decode_and_drop_map(p, end, ktype, vtype, bad)	\
 64	do {							\
 65		u32 n;						\
 66		size_t need;					\
 67		ceph_decode_32_safe(p, end, n, bad);		\
 68		need = (sizeof(ktype) + sizeof(vtype)) * n;	\
 69		ceph_decode_need(p, end, need, bad);		\
 70		*p += need;					\
 71	} while (0)
 72
 73
 74static int __decode_and_drop_compat_set(void **p, void* end)
 75{
 76	int i;
 77	/* compat, ro_compat, incompat*/
 78	for (i = 0; i < 3; i++) {
 79		u32 n;
 80		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
 81		/* mask */
 82		*p += sizeof(u64);
 83		/* names (map<u64, string>) */
 84		n = ceph_decode_32(p);
 85		while (n-- > 0) {
 86			u32 len;
 87			ceph_decode_need(p, end, sizeof(u64) + sizeof(u32),
 88					 bad);
 89			*p += sizeof(u64);
 90			len = ceph_decode_32(p);
 91			ceph_decode_need(p, end, len, bad);
 92			*p += len;
 93		}
 94	}
 95	return 0;
 96bad:
 97	return -1;
 98}
 99
100/*
101 * Decode an MDS map
102 *
103 * Ignore any fields we don't care about (there are quite a few of
104 * them).
105 */
106struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
107{
108	struct ceph_mdsmap *m;
109	const void *start = *p;
110	int i, j, n;
111	int err = -EINVAL;
112	u8 mdsmap_v, mdsmap_cv;
113	u16 mdsmap_ev;
114
115	m = kzalloc(sizeof(*m), GFP_NOFS);
116	if (!m)
117		return ERR_PTR(-ENOMEM);
118
119	ceph_decode_need(p, end, 1 + 1, bad);
120	mdsmap_v = ceph_decode_8(p);
121	mdsmap_cv = ceph_decode_8(p);
122	if (mdsmap_v >= 4) {
123	       u32 mdsmap_len;
124	       ceph_decode_32_safe(p, end, mdsmap_len, bad);
125	       if (end < *p + mdsmap_len)
126		       goto bad;
127	       end = *p + mdsmap_len;
128	}
129
130	ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad);
131	m->m_epoch = ceph_decode_32(p);
132	m->m_client_epoch = ceph_decode_32(p);
133	m->m_last_failure = ceph_decode_32(p);
134	m->m_root = ceph_decode_32(p);
135	m->m_session_timeout = ceph_decode_32(p);
136	m->m_session_autoclose = ceph_decode_32(p);
137	m->m_max_file_size = ceph_decode_64(p);
138	m->m_max_mds = ceph_decode_32(p);
139	m->m_num_mds = m->m_max_mds;
140
141	m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS);
142	if (!m->m_info)
143		goto nomem;
144
145	/* pick out active nodes from mds_info (state > 0) */
146	n = ceph_decode_32(p);
147	for (i = 0; i < n; i++) {
148		u64 global_id;
149		u32 namelen;
150		s32 mds, inc, state;
151		u64 state_seq;
152		u8 info_v;
153		void *info_end = NULL;
154		struct ceph_entity_addr addr;
155		u32 num_export_targets;
156		void *pexport_targets = NULL;
157		struct ceph_timespec laggy_since;
158		struct ceph_mds_info *info;
159
160		ceph_decode_need(p, end, sizeof(u64) + 1, bad);
161		global_id = ceph_decode_64(p);
162		info_v= ceph_decode_8(p);
163		if (info_v >= 4) {
164			u32 info_len;
165			u8 info_cv;
166			ceph_decode_need(p, end, 1 + sizeof(u32), bad);
167			info_cv = ceph_decode_8(p);
168			info_len = ceph_decode_32(p);
169			info_end = *p + info_len;
170			if (info_end > end)
171				goto bad;
172		}
173
174		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
175		*p += sizeof(u64);
176		namelen = ceph_decode_32(p);  /* skip mds name */
177		*p += namelen;
178
179		ceph_decode_need(p, end,
180				 4*sizeof(u32) + sizeof(u64) +
181				 sizeof(addr) + sizeof(struct ceph_timespec),
182				 bad);
183		mds = ceph_decode_32(p);
184		inc = ceph_decode_32(p);
185		state = ceph_decode_32(p);
186		state_seq = ceph_decode_64(p);
187		ceph_decode_copy(p, &addr, sizeof(addr));
188		ceph_decode_addr(&addr);
189		ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
190		*p += sizeof(u32);
191		ceph_decode_32_safe(p, end, namelen, bad);
192		*p += namelen;
193		if (info_v >= 2) {
194			ceph_decode_32_safe(p, end, num_export_targets, bad);
195			pexport_targets = *p;
196			*p += num_export_targets * sizeof(u32);
197		} else {
198			num_export_targets = 0;
199		}
200
201		if (info_end && *p != info_end) {
202			if (*p > info_end)
203				goto bad;
204			*p = info_end;
205		}
206
207		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
208		     i+1, n, global_id, mds, inc,
209		     ceph_pr_addr(&addr.in_addr),
210		     ceph_mds_state_name(state));
211
212		if (mds < 0 || state <= 0)
213			continue;
214
215		if (mds >= m->m_num_mds) {
216			int new_num = max(mds + 1, m->m_num_mds * 2);
217			void *new_m_info = krealloc(m->m_info,
218						new_num * sizeof(*m->m_info),
219						GFP_NOFS | __GFP_ZERO);
220			if (!new_m_info)
221				goto nomem;
222			m->m_info = new_m_info;
223			m->m_num_mds = new_num;
224		}
225
226		info = &m->m_info[mds];
227		info->global_id = global_id;
228		info->state = state;
229		info->addr = addr;
230		info->laggy = (laggy_since.tv_sec != 0 ||
231			       laggy_since.tv_nsec != 0);
232		info->num_export_targets = num_export_targets;
233		if (num_export_targets) {
234			info->export_targets = kcalloc(num_export_targets,
235						       sizeof(u32), GFP_NOFS);
236			if (!info->export_targets)
237				goto nomem;
238			for (j = 0; j < num_export_targets; j++)
239				info->export_targets[j] =
240				       ceph_decode_32(&pexport_targets);
241		} else {
242			info->export_targets = NULL;
243		}
244	}
245	if (m->m_num_mds > m->m_max_mds) {
246		/* find max up mds */
247		for (i = m->m_num_mds; i >= m->m_max_mds; i--) {
248			if (i == 0 || m->m_info[i-1].state > 0)
249				break;
250		}
251		m->m_num_mds = i;
252	}
253
254	/* pg_pools */
255	ceph_decode_32_safe(p, end, n, bad);
256	m->m_num_data_pg_pools = n;
257	m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
258	if (!m->m_data_pg_pools)
259		goto nomem;
260	ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
261	for (i = 0; i < n; i++)
262		m->m_data_pg_pools[i] = ceph_decode_64(p);
263	m->m_cas_pg_pool = ceph_decode_64(p);
264	m->m_enabled = m->m_epoch > 1;
265
266	mdsmap_ev = 1;
267	if (mdsmap_v >= 2) {
268		ceph_decode_16_safe(p, end, mdsmap_ev, bad_ext);
269	}
270	if (mdsmap_ev >= 3) {
271		if (__decode_and_drop_compat_set(p, end) < 0)
272			goto bad_ext;
273	}
274	/* metadata_pool */
275	if (mdsmap_ev < 5) {
276		__decode_and_drop_type(p, end, u32, bad_ext);
277	} else {
278		__decode_and_drop_type(p, end, u64, bad_ext);
279	}
280
281	/* created + modified + tableserver */
282	__decode_and_drop_type(p, end, struct ceph_timespec, bad_ext);
283	__decode_and_drop_type(p, end, struct ceph_timespec, bad_ext);
284	__decode_and_drop_type(p, end, u32, bad_ext);
285
286	/* in */
287	{
288		int num_laggy = 0;
289		ceph_decode_32_safe(p, end, n, bad_ext);
290		ceph_decode_need(p, end, sizeof(u32) * n, bad_ext);
291
292		for (i = 0; i < n; i++) {
293			s32 mds = ceph_decode_32(p);
294			if (mds >= 0 && mds < m->m_num_mds) {
295				if (m->m_info[mds].laggy)
296					num_laggy++;
297			}
298		}
299		m->m_num_laggy = num_laggy;
300
301		if (n > m->m_num_mds) {
302			void *new_m_info = krealloc(m->m_info,
303						    n * sizeof(*m->m_info),
304						    GFP_NOFS | __GFP_ZERO);
305			if (!new_m_info)
306				goto nomem;
307			m->m_info = new_m_info;
308		}
309		m->m_num_mds = n;
310	}
311
312	/* inc */
313	__decode_and_drop_map(p, end, u32, u32, bad_ext);
314	/* up */
315	__decode_and_drop_map(p, end, u32, u64, bad_ext);
316	/* failed */
317	__decode_and_drop_set(p, end, u32, bad_ext);
318	/* stopped */
319	__decode_and_drop_set(p, end, u32, bad_ext);
320
321	if (mdsmap_ev >= 4) {
322		/* last_failure_osd_epoch */
323		__decode_and_drop_type(p, end, u32, bad_ext);
324	}
325	if (mdsmap_ev >= 6) {
326		/* ever_allowed_snaps */
327		__decode_and_drop_type(p, end, u8, bad_ext);
328		/* explicitly_allowed_snaps */
329		__decode_and_drop_type(p, end, u8, bad_ext);
330	}
331	if (mdsmap_ev >= 7) {
332		/* inline_data_enabled */
333		__decode_and_drop_type(p, end, u8, bad_ext);
334	}
335	if (mdsmap_ev >= 8) {
336		u32 name_len;
337		/* enabled */
338		ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
339		ceph_decode_32_safe(p, end, name_len, bad_ext);
340		ceph_decode_need(p, end, name_len, bad_ext);
341		*p += name_len;
342	}
343	/* damaged */
344	if (mdsmap_ev >= 9) {
345		size_t need;
346		ceph_decode_32_safe(p, end, n, bad_ext);
347		need = sizeof(u32) * n;
348		ceph_decode_need(p, end, need, bad_ext);
349		*p += need;
350		m->m_damaged = n > 0;
351	} else {
352		m->m_damaged = false;
353	}
354bad_ext:
355	*p = end;
356	dout("mdsmap_decode success epoch %u\n", m->m_epoch);
357	return m;
358nomem:
 
359	err = -ENOMEM;
360	goto out_err;
361bad:
362	pr_err("corrupt mdsmap\n");
363	print_hex_dump(KERN_DEBUG, "mdsmap: ",
364		       DUMP_PREFIX_OFFSET, 16, 1,
365		       start, end - start, true);
366out_err:
367	ceph_mdsmap_destroy(m);
368	return ERR_PTR(err);
369}
370
371void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
372{
373	int i;
374
375	for (i = 0; i < m->m_num_mds; i++)
376		kfree(m->m_info[i].export_targets);
377	kfree(m->m_info);
378	kfree(m->m_data_pg_pools);
379	kfree(m);
380}
381
382bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
383{
384	int i, nr_active = 0;
385	if (!m->m_enabled)
386		return false;
387	if (m->m_damaged)
388		return false;
389	if (m->m_num_laggy > 0)
390		return false;
391	for (i = 0; i < m->m_num_mds; i++) {
392		if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
393			nr_active++;
394	}
395	return nr_active > 0;
396}
v3.5.6
 
  1#include <linux/ceph/ceph_debug.h>
  2
  3#include <linux/bug.h>
  4#include <linux/err.h>
  5#include <linux/random.h>
  6#include <linux/slab.h>
  7#include <linux/types.h>
  8
  9#include <linux/ceph/mdsmap.h>
 10#include <linux/ceph/messenger.h>
 11#include <linux/ceph/decode.h>
 12
 13#include "super.h"
 14
 15
 16/*
 17 * choose a random mds that is "up" (i.e. has a state > 0), or -1.
 18 */
 19int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 20{
 21	int n = 0;
 22	int i;
 23	char r;
 
 
 
 24
 25	/* count */
 26	for (i = 0; i < m->m_max_mds; i++)
 27		if (m->m_info[i].state > 0)
 28			n++;
 29	if (n == 0)
 30		return -1;
 31
 32	/* pick */
 33	get_random_bytes(&r, 1);
 34	n = r % n;
 35	i = 0;
 36	for (i = 0; n > 0; i++, n--)
 37		while (m->m_info[i].state <= 0)
 38			i++;
 39
 40	return i;
 41}
 42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 43/*
 44 * Decode an MDS map
 45 *
 46 * Ignore any fields we don't care about (there are quite a few of
 47 * them).
 48 */
 49struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 50{
 51	struct ceph_mdsmap *m;
 52	const void *start = *p;
 53	int i, j, n;
 54	int err = -EINVAL;
 55	u16 version;
 
 56
 57	m = kzalloc(sizeof(*m), GFP_NOFS);
 58	if (m == NULL)
 59		return ERR_PTR(-ENOMEM);
 60
 61	ceph_decode_16_safe(p, end, version, bad);
 
 
 
 
 
 
 
 
 
 62
 63	ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad);
 64	m->m_epoch = ceph_decode_32(p);
 65	m->m_client_epoch = ceph_decode_32(p);
 66	m->m_last_failure = ceph_decode_32(p);
 67	m->m_root = ceph_decode_32(p);
 68	m->m_session_timeout = ceph_decode_32(p);
 69	m->m_session_autoclose = ceph_decode_32(p);
 70	m->m_max_file_size = ceph_decode_64(p);
 71	m->m_max_mds = ceph_decode_32(p);
 
 72
 73	m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
 74	if (m->m_info == NULL)
 75		goto badmem;
 76
 77	/* pick out active nodes from mds_info (state > 0) */
 78	n = ceph_decode_32(p);
 79	for (i = 0; i < n; i++) {
 80		u64 global_id;
 81		u32 namelen;
 82		s32 mds, inc, state;
 83		u64 state_seq;
 84		u8 infoversion;
 
 85		struct ceph_entity_addr addr;
 86		u32 num_export_targets;
 87		void *pexport_targets = NULL;
 88		struct ceph_timespec laggy_since;
 
 89
 90		ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad);
 91		global_id = ceph_decode_64(p);
 92		infoversion = ceph_decode_8(p);
 
 
 
 
 
 
 
 
 
 
 
 
 93		*p += sizeof(u64);
 94		namelen = ceph_decode_32(p);  /* skip mds name */
 95		*p += namelen;
 96
 97		ceph_decode_need(p, end,
 98				 4*sizeof(u32) + sizeof(u64) +
 99				 sizeof(addr) + sizeof(struct ceph_timespec),
100				 bad);
101		mds = ceph_decode_32(p);
102		inc = ceph_decode_32(p);
103		state = ceph_decode_32(p);
104		state_seq = ceph_decode_64(p);
105		ceph_decode_copy(p, &addr, sizeof(addr));
106		ceph_decode_addr(&addr);
107		ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
108		*p += sizeof(u32);
109		ceph_decode_32_safe(p, end, namelen, bad);
110		*p += namelen;
111		if (infoversion >= 2) {
112			ceph_decode_32_safe(p, end, num_export_targets, bad);
113			pexport_targets = *p;
114			*p += num_export_targets * sizeof(u32);
115		} else {
116			num_export_targets = 0;
117		}
118
 
 
 
 
 
 
119		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
120		     i+1, n, global_id, mds, inc,
121		     ceph_pr_addr(&addr.in_addr),
122		     ceph_mds_state_name(state));
123		if (mds >= 0 && mds < m->m_max_mds && state > 0) {
124			m->m_info[mds].global_id = global_id;
125			m->m_info[mds].state = state;
126			m->m_info[mds].addr = addr;
127			m->m_info[mds].laggy =
128				(laggy_since.tv_sec != 0 ||
129				 laggy_since.tv_nsec != 0);
130			m->m_info[mds].num_export_targets = num_export_targets;
131			if (num_export_targets) {
132				m->m_info[mds].export_targets =
133					kcalloc(num_export_targets, sizeof(u32),
134						GFP_NOFS);
135				for (j = 0; j < num_export_targets; j++)
136					m->m_info[mds].export_targets[j] =
137					       ceph_decode_32(&pexport_targets);
138			} else {
139				m->m_info[mds].export_targets = NULL;
140			}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141		}
 
142	}
143
144	/* pg_pools */
145	ceph_decode_32_safe(p, end, n, bad);
146	m->m_num_data_pg_pools = n;
147	m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS);
148	if (!m->m_data_pg_pools)
149		goto badmem;
150	ceph_decode_need(p, end, sizeof(u32)*(n+1), bad);
151	for (i = 0; i < n; i++)
152		m->m_data_pg_pools[i] = ceph_decode_32(p);
153	m->m_cas_pg_pool = ceph_decode_32(p);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
155	/* ok, we don't care about the rest. */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156	dout("mdsmap_decode success epoch %u\n", m->m_epoch);
157	return m;
158
159badmem:
160	err = -ENOMEM;
 
161bad:
162	pr_err("corrupt mdsmap\n");
163	print_hex_dump(KERN_DEBUG, "mdsmap: ",
164		       DUMP_PREFIX_OFFSET, 16, 1,
165		       start, end - start, true);
 
166	ceph_mdsmap_destroy(m);
167	return ERR_PTR(-EINVAL);
168}
169
170void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
171{
172	int i;
173
174	for (i = 0; i < m->m_max_mds; i++)
175		kfree(m->m_info[i].export_targets);
176	kfree(m->m_info);
177	kfree(m->m_data_pg_pools);
178	kfree(m);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179}