Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0
  2/* Copyright (c) 2019 Facebook  */
  3#include <linux/rculist.h>
  4#include <linux/list.h>
  5#include <linux/hash.h>
  6#include <linux/types.h>
  7#include <linux/spinlock.h>
  8#include <linux/bpf.h>
  9#include <linux/btf.h>
 10#include <linux/btf_ids.h>
 11#include <linux/bpf_local_storage.h>
 12#include <net/bpf_sk_storage.h>
 13#include <net/sock.h>
 14#include <uapi/linux/sock_diag.h>
 15#include <uapi/linux/btf.h>
 16#include <linux/rcupdate_trace.h>
 17
 18DEFINE_BPF_STORAGE_CACHE(sk_cache);
 19
 20static struct bpf_local_storage_data *
 21bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
 22{
 23	struct bpf_local_storage *sk_storage;
 24	struct bpf_local_storage_map *smap;
 25
 26	sk_storage =
 27		rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held());
 28	if (!sk_storage)
 29		return NULL;
 30
 31	smap = (struct bpf_local_storage_map *)map;
 32	return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit);
 33}
 34
 35static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
 36{
 37	struct bpf_local_storage_data *sdata;
 38
 39	sdata = bpf_sk_storage_lookup(sk, map, false);
 40	if (!sdata)
 41		return -ENOENT;
 42
 43	bpf_selem_unlink(SELEM(sdata), false);
 44
 45	return 0;
 46}
 47
 48/* Called by __sk_destruct() & bpf_sk_storage_clone() */
 49void bpf_sk_storage_free(struct sock *sk)
 50{
 51	struct bpf_local_storage *sk_storage;
 52
 53	rcu_read_lock();
 54	sk_storage = rcu_dereference(sk->sk_bpf_storage);
 55	if (!sk_storage) {
 56		rcu_read_unlock();
 57		return;
 58	}
 59
 60	bpf_local_storage_destroy(sk_storage);
 61	rcu_read_unlock();
 62}
 63
 64static void bpf_sk_storage_map_free(struct bpf_map *map)
 65{
 66	bpf_local_storage_map_free(map, &sk_cache, NULL);
 67}
 68
 69static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
 70{
 71	return bpf_local_storage_map_alloc(attr, &sk_cache, false);
 72}
 73
 74static int notsupp_get_next_key(struct bpf_map *map, void *key,
 75				void *next_key)
 76{
 77	return -ENOTSUPP;
 78}
 79
 80static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
 81{
 82	struct bpf_local_storage_data *sdata;
 83	struct socket *sock;
 84	int fd, err;
 85
 86	fd = *(int *)key;
 87	sock = sockfd_lookup(fd, &err);
 88	if (sock) {
 89		sdata = bpf_sk_storage_lookup(sock->sk, map, true);
 90		sockfd_put(sock);
 91		return sdata ? sdata->data : NULL;
 92	}
 93
 94	return ERR_PTR(err);
 95}
 96
 97static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
 98					  void *value, u64 map_flags)
 99{
100	struct bpf_local_storage_data *sdata;
101	struct socket *sock;
102	int fd, err;
103
104	fd = *(int *)key;
105	sock = sockfd_lookup(fd, &err);
106	if (sock) {
107		sdata = bpf_local_storage_update(
108			sock->sk, (struct bpf_local_storage_map *)map, value,
109			map_flags, false, GFP_ATOMIC);
110		sockfd_put(sock);
111		return PTR_ERR_OR_ZERO(sdata);
112	}
113
114	return err;
115}
116
117static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
118{
119	struct socket *sock;
120	int fd, err;
121
122	fd = *(int *)key;
123	sock = sockfd_lookup(fd, &err);
124	if (sock) {
125		err = bpf_sk_storage_del(sock->sk, map);
126		sockfd_put(sock);
127		return err;
128	}
129
130	return err;
131}
132
133static struct bpf_local_storage_elem *
134bpf_sk_storage_clone_elem(struct sock *newsk,
135			  struct bpf_local_storage_map *smap,
136			  struct bpf_local_storage_elem *selem)
137{
138	struct bpf_local_storage_elem *copy_selem;
139
140	copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, false, GFP_ATOMIC);
141	if (!copy_selem)
142		return NULL;
143
144	if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))
145		copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
146				      SDATA(selem)->data, true);
147	else
148		copy_map_value(&smap->map, SDATA(copy_selem)->data,
149			       SDATA(selem)->data);
150
151	return copy_selem;
152}
153
154int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
155{
156	struct bpf_local_storage *new_sk_storage = NULL;
157	struct bpf_local_storage *sk_storage;
158	struct bpf_local_storage_elem *selem;
159	int ret = 0;
160
161	RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
162
163	rcu_read_lock();
164	sk_storage = rcu_dereference(sk->sk_bpf_storage);
165
166	if (!sk_storage || hlist_empty(&sk_storage->list))
167		goto out;
168
169	hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
170		struct bpf_local_storage_elem *copy_selem;
171		struct bpf_local_storage_map *smap;
172		struct bpf_map *map;
173
174		smap = rcu_dereference(SDATA(selem)->smap);
175		if (!(smap->map.map_flags & BPF_F_CLONE))
176			continue;
177
178		/* Note that for lockless listeners adding new element
179		 * here can race with cleanup in bpf_local_storage_map_free.
180		 * Try to grab map refcnt to make sure that it's still
181		 * alive and prevent concurrent removal.
182		 */
183		map = bpf_map_inc_not_zero(&smap->map);
184		if (IS_ERR(map))
185			continue;
186
187		copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
188		if (!copy_selem) {
189			ret = -ENOMEM;
190			bpf_map_put(map);
191			goto out;
192		}
193
194		if (new_sk_storage) {
195			bpf_selem_link_map(smap, copy_selem);
196			bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
197		} else {
198			ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
199			if (ret) {
200				bpf_selem_free(copy_selem, smap, true);
201				atomic_sub(smap->elem_size,
202					   &newsk->sk_omem_alloc);
203				bpf_map_put(map);
204				goto out;
205			}
206
207			new_sk_storage =
208				rcu_dereference(copy_selem->local_storage);
209		}
210		bpf_map_put(map);
211	}
212
213out:
214	rcu_read_unlock();
215
216	/* In case of an error, don't free anything explicitly here, the
217	 * caller is responsible to call bpf_sk_storage_free.
218	 */
219
220	return ret;
221}
222
223/* *gfp_flags* is a hidden argument provided by the verifier */
224BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
225	   void *, value, u64, flags, gfp_t, gfp_flags)
226{
227	struct bpf_local_storage_data *sdata;
228
229	WARN_ON_ONCE(!bpf_rcu_lock_held());
230	if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
231		return (unsigned long)NULL;
232
233	sdata = bpf_sk_storage_lookup(sk, map, true);
234	if (sdata)
235		return (unsigned long)sdata->data;
236
237	if (flags == BPF_SK_STORAGE_GET_F_CREATE &&
238	    /* Cannot add new elem to a going away sk.
239	     * Otherwise, the new elem may become a leak
240	     * (and also other memory issues during map
241	     *  destruction).
242	     */
243	    refcount_inc_not_zero(&sk->sk_refcnt)) {
244		sdata = bpf_local_storage_update(
245			sk, (struct bpf_local_storage_map *)map, value,
246			BPF_NOEXIST, false, gfp_flags);
247		/* sk must be a fullsock (guaranteed by verifier),
248		 * so sock_gen_put() is unnecessary.
249		 */
250		sock_put(sk);
251		return IS_ERR(sdata) ?
252			(unsigned long)NULL : (unsigned long)sdata->data;
253	}
254
255	return (unsigned long)NULL;
256}
257
258BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
259{
260	WARN_ON_ONCE(!bpf_rcu_lock_held());
261	if (!sk || !sk_fullsock(sk))
262		return -EINVAL;
263
264	if (refcount_inc_not_zero(&sk->sk_refcnt)) {
265		int err;
266
267		err = bpf_sk_storage_del(sk, map);
268		sock_put(sk);
269		return err;
270	}
271
272	return -ENOENT;
273}
274
275static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
276				 void *owner, u32 size)
277{
278	struct sock *sk = (struct sock *)owner;
279	int optmem_max;
280
281	optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
282	/* same check as in sock_kmalloc() */
283	if (size <= optmem_max &&
284	    atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
285		atomic_add(size, &sk->sk_omem_alloc);
286		return 0;
287	}
288
289	return -ENOMEM;
290}
291
292static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap,
293				    void *owner, u32 size)
294{
295	struct sock *sk = owner;
296
297	atomic_sub(size, &sk->sk_omem_alloc);
298}
299
300static struct bpf_local_storage __rcu **
301bpf_sk_storage_ptr(void *owner)
302{
303	struct sock *sk = owner;
304
305	return &sk->sk_bpf_storage;
306}
307
308const struct bpf_map_ops sk_storage_map_ops = {
309	.map_meta_equal = bpf_map_meta_equal,
310	.map_alloc_check = bpf_local_storage_map_alloc_check,
311	.map_alloc = bpf_sk_storage_map_alloc,
312	.map_free = bpf_sk_storage_map_free,
313	.map_get_next_key = notsupp_get_next_key,
314	.map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
315	.map_update_elem = bpf_fd_sk_storage_update_elem,
316	.map_delete_elem = bpf_fd_sk_storage_delete_elem,
317	.map_check_btf = bpf_local_storage_map_check_btf,
318	.map_btf_id = &bpf_local_storage_map_btf_id[0],
319	.map_local_storage_charge = bpf_sk_storage_charge,
320	.map_local_storage_uncharge = bpf_sk_storage_uncharge,
321	.map_owner_storage_ptr = bpf_sk_storage_ptr,
322	.map_mem_usage = bpf_local_storage_map_mem_usage,
323};
324
325const struct bpf_func_proto bpf_sk_storage_get_proto = {
326	.func		= bpf_sk_storage_get,
327	.gpl_only	= false,
328	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
329	.arg1_type	= ARG_CONST_MAP_PTR,
330	.arg2_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
331	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
332	.arg4_type	= ARG_ANYTHING,
333};
334
335const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = {
336	.func		= bpf_sk_storage_get,
337	.gpl_only	= false,
338	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
339	.arg1_type	= ARG_CONST_MAP_PTR,
340	.arg2_type	= ARG_PTR_TO_CTX, /* context is 'struct sock' */
341	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
342	.arg4_type	= ARG_ANYTHING,
343};
344
345const struct bpf_func_proto bpf_sk_storage_delete_proto = {
346	.func		= bpf_sk_storage_delete,
347	.gpl_only	= false,
348	.ret_type	= RET_INTEGER,
349	.arg1_type	= ARG_CONST_MAP_PTR,
350	.arg2_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
351};
352
353static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
354{
355	const struct btf *btf_vmlinux;
356	const struct btf_type *t;
357	const char *tname;
358	u32 btf_id;
359
360	if (prog->aux->dst_prog)
361		return false;
362
363	/* Ensure the tracing program is not tracing
364	 * any bpf_sk_storage*() function and also
365	 * use the bpf_sk_storage_(get|delete) helper.
366	 */
367	switch (prog->expected_attach_type) {
368	case BPF_TRACE_ITER:
369	case BPF_TRACE_RAW_TP:
370		/* bpf_sk_storage has no trace point */
371		return true;
372	case BPF_TRACE_FENTRY:
373	case BPF_TRACE_FEXIT:
374		btf_vmlinux = bpf_get_btf_vmlinux();
375		if (IS_ERR_OR_NULL(btf_vmlinux))
376			return false;
377		btf_id = prog->aux->attach_btf_id;
378		t = btf_type_by_id(btf_vmlinux, btf_id);
379		tname = btf_name_by_offset(btf_vmlinux, t->name_off);
380		return !!strncmp(tname, "bpf_sk_storage",
381				 strlen("bpf_sk_storage"));
382	default:
383		return false;
384	}
385
386	return false;
387}
388
389/* *gfp_flags* is a hidden argument provided by the verifier */
390BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
391	   void *, value, u64, flags, gfp_t, gfp_flags)
392{
393	WARN_ON_ONCE(!bpf_rcu_lock_held());
394	if (in_hardirq() || in_nmi())
395		return (unsigned long)NULL;
396
397	return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
398						     gfp_flags);
399}
400
401BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
402	   struct sock *, sk)
403{
404	WARN_ON_ONCE(!bpf_rcu_lock_held());
405	if (in_hardirq() || in_nmi())
406		return -EPERM;
407
408	return ____bpf_sk_storage_delete(map, sk);
409}
410
411const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = {
412	.func		= bpf_sk_storage_get_tracing,
413	.gpl_only	= false,
414	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
415	.arg1_type	= ARG_CONST_MAP_PTR,
416	.arg2_type	= ARG_PTR_TO_BTF_ID_OR_NULL,
417	.arg2_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
418	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
419	.arg4_type	= ARG_ANYTHING,
420	.allowed	= bpf_sk_storage_tracing_allowed,
421};
422
423const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = {
424	.func		= bpf_sk_storage_delete_tracing,
425	.gpl_only	= false,
426	.ret_type	= RET_INTEGER,
427	.arg1_type	= ARG_CONST_MAP_PTR,
428	.arg2_type	= ARG_PTR_TO_BTF_ID_OR_NULL,
429	.arg2_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
430	.allowed	= bpf_sk_storage_tracing_allowed,
431};
432
433struct bpf_sk_storage_diag {
434	u32 nr_maps;
435	struct bpf_map *maps[];
436};
437
438/* The reply will be like:
439 * INET_DIAG_BPF_SK_STORAGES (nla_nest)
440 *	SK_DIAG_BPF_STORAGE (nla_nest)
441 *		SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
442 *		SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
443 *	SK_DIAG_BPF_STORAGE (nla_nest)
444 *		SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
445 *		SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
446 *	....
447 */
448static int nla_value_size(u32 value_size)
449{
450	/* SK_DIAG_BPF_STORAGE (nla_nest)
451	 *	SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
452	 *	SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
453	 */
454	return nla_total_size(0) + nla_total_size(sizeof(u32)) +
455		nla_total_size_64bit(value_size);
456}
457
458void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag)
459{
460	u32 i;
461
462	if (!diag)
463		return;
464
465	for (i = 0; i < diag->nr_maps; i++)
466		bpf_map_put(diag->maps[i]);
467
468	kfree(diag);
469}
470EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free);
471
472static bool diag_check_dup(const struct bpf_sk_storage_diag *diag,
473			   const struct bpf_map *map)
474{
475	u32 i;
476
477	for (i = 0; i < diag->nr_maps; i++) {
478		if (diag->maps[i] == map)
479			return true;
480	}
481
482	return false;
483}
484
485struct bpf_sk_storage_diag *
486bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
487{
488	struct bpf_sk_storage_diag *diag;
489	struct nlattr *nla;
490	u32 nr_maps = 0;
491	int rem, err;
492
493	/* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as
494	 * the map_alloc_check() side also does.
495	 */
496	if (!bpf_capable())
497		return ERR_PTR(-EPERM);
498
499	nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD,
500				 nla_stgs, rem) {
501		if (nla_len(nla) != sizeof(u32))
502			return ERR_PTR(-EINVAL);
503		nr_maps++;
504	}
505
506	diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
507	if (!diag)
508		return ERR_PTR(-ENOMEM);
509
510	nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD,
511				 nla_stgs, rem) {
512		int map_fd = nla_get_u32(nla);
513		struct bpf_map *map = bpf_map_get(map_fd);
514
515		if (IS_ERR(map)) {
516			err = PTR_ERR(map);
517			goto err_free;
518		}
519		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) {
520			bpf_map_put(map);
521			err = -EINVAL;
522			goto err_free;
523		}
524		if (diag_check_dup(diag, map)) {
525			bpf_map_put(map);
526			err = -EEXIST;
527			goto err_free;
528		}
529		diag->maps[diag->nr_maps++] = map;
530	}
531
532	return diag;
533
534err_free:
535	bpf_sk_storage_diag_free(diag);
536	return ERR_PTR(err);
537}
538EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
539
540static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
541{
542	struct nlattr *nla_stg, *nla_value;
543	struct bpf_local_storage_map *smap;
544
545	/* It cannot exceed max nlattr's payload */
546	BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE);
547
548	nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE);
549	if (!nla_stg)
550		return -EMSGSIZE;
551
552	smap = rcu_dereference(sdata->smap);
553	if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id))
554		goto errout;
555
556	nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE,
557				      smap->map.value_size,
558				      SK_DIAG_BPF_STORAGE_PAD);
559	if (!nla_value)
560		goto errout;
561
562	if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))
563		copy_map_value_locked(&smap->map, nla_data(nla_value),
564				      sdata->data, true);
565	else
566		copy_map_value(&smap->map, nla_data(nla_value), sdata->data);
567
568	nla_nest_end(skb, nla_stg);
569	return 0;
570
571errout:
572	nla_nest_cancel(skb, nla_stg);
573	return -EMSGSIZE;
574}
575
576static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
577				       int stg_array_type,
578				       unsigned int *res_diag_size)
579{
580	/* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
581	unsigned int diag_size = nla_total_size(0);
582	struct bpf_local_storage *sk_storage;
583	struct bpf_local_storage_elem *selem;
584	struct bpf_local_storage_map *smap;
585	struct nlattr *nla_stgs;
586	unsigned int saved_len;
587	int err = 0;
588
589	rcu_read_lock();
590
591	sk_storage = rcu_dereference(sk->sk_bpf_storage);
592	if (!sk_storage || hlist_empty(&sk_storage->list)) {
593		rcu_read_unlock();
594		return 0;
595	}
596
597	nla_stgs = nla_nest_start(skb, stg_array_type);
598	if (!nla_stgs)
599		/* Continue to learn diag_size */
600		err = -EMSGSIZE;
601
602	saved_len = skb->len;
603	hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
604		smap = rcu_dereference(SDATA(selem)->smap);
605		diag_size += nla_value_size(smap->map.value_size);
606
607		if (nla_stgs && diag_get(SDATA(selem), skb))
608			/* Continue to learn diag_size */
609			err = -EMSGSIZE;
610	}
611
612	rcu_read_unlock();
613
614	if (nla_stgs) {
615		if (saved_len == skb->len)
616			nla_nest_cancel(skb, nla_stgs);
617		else
618			nla_nest_end(skb, nla_stgs);
619	}
620
621	if (diag_size == nla_total_size(0)) {
622		*res_diag_size = 0;
623		return 0;
624	}
625
626	*res_diag_size = diag_size;
627	return err;
628}
629
630int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
631			    struct sock *sk, struct sk_buff *skb,
632			    int stg_array_type,
633			    unsigned int *res_diag_size)
634{
635	/* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
636	unsigned int diag_size = nla_total_size(0);
637	struct bpf_local_storage *sk_storage;
638	struct bpf_local_storage_data *sdata;
639	struct nlattr *nla_stgs;
640	unsigned int saved_len;
641	int err = 0;
642	u32 i;
643
644	*res_diag_size = 0;
645
646	/* No map has been specified.  Dump all. */
647	if (!diag->nr_maps)
648		return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type,
649						   res_diag_size);
650
651	rcu_read_lock();
652	sk_storage = rcu_dereference(sk->sk_bpf_storage);
653	if (!sk_storage || hlist_empty(&sk_storage->list)) {
654		rcu_read_unlock();
655		return 0;
656	}
657
658	nla_stgs = nla_nest_start(skb, stg_array_type);
659	if (!nla_stgs)
660		/* Continue to learn diag_size */
661		err = -EMSGSIZE;
662
663	saved_len = skb->len;
664	for (i = 0; i < diag->nr_maps; i++) {
665		sdata = bpf_local_storage_lookup(sk_storage,
666				(struct bpf_local_storage_map *)diag->maps[i],
667				false);
668
669		if (!sdata)
670			continue;
671
672		diag_size += nla_value_size(diag->maps[i]->value_size);
673
674		if (nla_stgs && diag_get(sdata, skb))
675			/* Continue to learn diag_size */
676			err = -EMSGSIZE;
677	}
678	rcu_read_unlock();
679
680	if (nla_stgs) {
681		if (saved_len == skb->len)
682			nla_nest_cancel(skb, nla_stgs);
683		else
684			nla_nest_end(skb, nla_stgs);
685	}
686
687	if (diag_size == nla_total_size(0)) {
688		*res_diag_size = 0;
689		return 0;
690	}
691
692	*res_diag_size = diag_size;
693	return err;
694}
695EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put);
696
697struct bpf_iter_seq_sk_storage_map_info {
698	struct bpf_map *map;
699	unsigned int bucket_id;
700	unsigned skip_elems;
701};
702
703static struct bpf_local_storage_elem *
704bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
705				 struct bpf_local_storage_elem *prev_selem)
706	__acquires(RCU) __releases(RCU)
707{
708	struct bpf_local_storage *sk_storage;
709	struct bpf_local_storage_elem *selem;
710	u32 skip_elems = info->skip_elems;
711	struct bpf_local_storage_map *smap;
712	u32 bucket_id = info->bucket_id;
713	u32 i, count, n_buckets;
714	struct bpf_local_storage_map_bucket *b;
715
716	smap = (struct bpf_local_storage_map *)info->map;
717	n_buckets = 1U << smap->bucket_log;
718	if (bucket_id >= n_buckets)
719		return NULL;
720
721	/* try to find next selem in the same bucket */
722	selem = prev_selem;
723	count = 0;
724	while (selem) {
725		selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)),
726					 struct bpf_local_storage_elem, map_node);
727		if (!selem) {
728			/* not found, unlock and go to the next bucket */
729			b = &smap->buckets[bucket_id++];
730			rcu_read_unlock();
731			skip_elems = 0;
732			break;
733		}
734		sk_storage = rcu_dereference(selem->local_storage);
735		if (sk_storage) {
736			info->skip_elems = skip_elems + count;
737			return selem;
738		}
739		count++;
740	}
741
742	for (i = bucket_id; i < (1U << smap->bucket_log); i++) {
743		b = &smap->buckets[i];
744		rcu_read_lock();
745		count = 0;
746		hlist_for_each_entry_rcu(selem, &b->list, map_node) {
747			sk_storage = rcu_dereference(selem->local_storage);
748			if (sk_storage && count >= skip_elems) {
749				info->bucket_id = i;
750				info->skip_elems = count;
751				return selem;
752			}
753			count++;
754		}
755		rcu_read_unlock();
756		skip_elems = 0;
757	}
758
759	info->bucket_id = i;
760	info->skip_elems = 0;
761	return NULL;
762}
763
764static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos)
765{
766	struct bpf_local_storage_elem *selem;
767
768	selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL);
769	if (!selem)
770		return NULL;
771
772	if (*pos == 0)
773		++*pos;
774	return selem;
775}
776
777static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v,
778					 loff_t *pos)
779{
780	struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
781
782	++*pos;
783	++info->skip_elems;
784	return bpf_sk_storage_map_seq_find_next(seq->private, v);
785}
786
787struct bpf_iter__bpf_sk_storage_map {
788	__bpf_md_ptr(struct bpf_iter_meta *, meta);
789	__bpf_md_ptr(struct bpf_map *, map);
790	__bpf_md_ptr(struct sock *, sk);
791	__bpf_md_ptr(void *, value);
792};
793
794DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta,
795		     struct bpf_map *map, struct sock *sk,
796		     void *value)
797
798static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
799					 struct bpf_local_storage_elem *selem)
800{
801	struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
802	struct bpf_iter__bpf_sk_storage_map ctx = {};
803	struct bpf_local_storage *sk_storage;
804	struct bpf_iter_meta meta;
805	struct bpf_prog *prog;
806	int ret = 0;
807
808	meta.seq = seq;
809	prog = bpf_iter_get_info(&meta, selem == NULL);
810	if (prog) {
811		ctx.meta = &meta;
812		ctx.map = info->map;
813		if (selem) {
814			sk_storage = rcu_dereference(selem->local_storage);
815			ctx.sk = sk_storage->owner;
816			ctx.value = SDATA(selem)->data;
817		}
818		ret = bpf_iter_run_prog(prog, &ctx);
819	}
820
821	return ret;
822}
823
824static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v)
825{
826	return __bpf_sk_storage_map_seq_show(seq, v);
827}
828
829static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v)
830	__releases(RCU)
831{
832	if (!v)
833		(void)__bpf_sk_storage_map_seq_show(seq, v);
834	else
835		rcu_read_unlock();
836}
837
838static int bpf_iter_init_sk_storage_map(void *priv_data,
839					struct bpf_iter_aux_info *aux)
840{
841	struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
842
843	bpf_map_inc_with_uref(aux->map);
844	seq_info->map = aux->map;
845	return 0;
846}
847
848static void bpf_iter_fini_sk_storage_map(void *priv_data)
849{
850	struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
851
852	bpf_map_put_with_uref(seq_info->map);
853}
854
855static int bpf_iter_attach_map(struct bpf_prog *prog,
856			       union bpf_iter_link_info *linfo,
857			       struct bpf_iter_aux_info *aux)
858{
859	struct bpf_map *map;
860	int err = -EINVAL;
861
862	if (!linfo->map.map_fd)
863		return -EBADF;
864
865	map = bpf_map_get_with_uref(linfo->map.map_fd);
866	if (IS_ERR(map))
867		return PTR_ERR(map);
868
869	if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
870		goto put_map;
871
872	if (prog->aux->max_rdwr_access > map->value_size) {
873		err = -EACCES;
874		goto put_map;
875	}
876
877	aux->map = map;
878	return 0;
879
880put_map:
881	bpf_map_put_with_uref(map);
882	return err;
883}
884
885static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
886{
887	bpf_map_put_with_uref(aux->map);
888}
889
890static const struct seq_operations bpf_sk_storage_map_seq_ops = {
891	.start  = bpf_sk_storage_map_seq_start,
892	.next   = bpf_sk_storage_map_seq_next,
893	.stop   = bpf_sk_storage_map_seq_stop,
894	.show   = bpf_sk_storage_map_seq_show,
895};
896
897static const struct bpf_iter_seq_info iter_seq_info = {
898	.seq_ops		= &bpf_sk_storage_map_seq_ops,
899	.init_seq_private	= bpf_iter_init_sk_storage_map,
900	.fini_seq_private	= bpf_iter_fini_sk_storage_map,
901	.seq_priv_size		= sizeof(struct bpf_iter_seq_sk_storage_map_info),
902};
903
904static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
905	.target			= "bpf_sk_storage_map",
906	.attach_target		= bpf_iter_attach_map,
907	.detach_target		= bpf_iter_detach_map,
908	.show_fdinfo		= bpf_iter_map_show_fdinfo,
909	.fill_link_info		= bpf_iter_map_fill_link_info,
910	.ctx_arg_info_size	= 2,
911	.ctx_arg_info		= {
912		{ offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
913		  PTR_TO_BTF_ID_OR_NULL },
914		{ offsetof(struct bpf_iter__bpf_sk_storage_map, value),
915		  PTR_TO_BUF | PTR_MAYBE_NULL },
916	},
917	.seq_info		= &iter_seq_info,
918};
919
920static int __init bpf_sk_storage_map_iter_init(void)
921{
922	bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id =
923		btf_sock_ids[BTF_SOCK_TYPE_SOCK];
924	return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info);
925}
926late_initcall(bpf_sk_storage_map_iter_init);