Linux Audio

Check our new training course

Linux BSP upgrade and security maintenance

Need help to get security updates for your Linux BSP?
Loading...
Note: File does not exist in v3.15.
  1// SPDX-License-Identifier: GPL-2.0
  2/* Copyright (c) 2019 Facebook  */
  3#include <linux/rculist.h>
  4#include <linux/list.h>
  5#include <linux/hash.h>
  6#include <linux/types.h>
  7#include <linux/spinlock.h>
  8#include <linux/bpf.h>
  9#include <linux/btf_ids.h>
 10#include <linux/bpf_local_storage.h>
 11#include <net/sock.h>
 12#include <uapi/linux/sock_diag.h>
 13#include <uapi/linux/btf.h>
 14
 15#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
 16
 17static struct bpf_local_storage_map_bucket *
 18select_bucket(struct bpf_local_storage_map *smap,
 19	      struct bpf_local_storage_elem *selem)
 20{
 21	return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
 22}
 23
 24static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
 25{
 26	struct bpf_map *map = &smap->map;
 27
 28	if (!map->ops->map_local_storage_charge)
 29		return 0;
 30
 31	return map->ops->map_local_storage_charge(smap, owner, size);
 32}
 33
 34static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
 35			 u32 size)
 36{
 37	struct bpf_map *map = &smap->map;
 38
 39	if (map->ops->map_local_storage_uncharge)
 40		map->ops->map_local_storage_uncharge(smap, owner, size);
 41}
 42
 43static struct bpf_local_storage __rcu **
 44owner_storage(struct bpf_local_storage_map *smap, void *owner)
 45{
 46	struct bpf_map *map = &smap->map;
 47
 48	return map->ops->map_owner_storage_ptr(owner);
 49}
 50
 51static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
 52{
 53	return !hlist_unhashed(&selem->snode);
 54}
 55
 56static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
 57{
 58	return !hlist_unhashed(&selem->map_node);
 59}
 60
 61struct bpf_local_storage_elem *
 62bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
 63		void *value, bool charge_mem)
 64{
 65	struct bpf_local_storage_elem *selem;
 66
 67	if (charge_mem && mem_charge(smap, owner, smap->elem_size))
 68		return NULL;
 69
 70	selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
 71				GFP_ATOMIC | __GFP_NOWARN);
 72	if (selem) {
 73		if (value)
 74			memcpy(SDATA(selem)->data, value, smap->map.value_size);
 75		return selem;
 76	}
 77
 78	if (charge_mem)
 79		mem_uncharge(smap, owner, smap->elem_size);
 80
 81	return NULL;
 82}
 83
 84/* local_storage->lock must be held and selem->local_storage == local_storage.
 85 * The caller must ensure selem->smap is still valid to be
 86 * dereferenced for its smap->elem_size and smap->cache_idx.
 87 */
 88bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
 89				     struct bpf_local_storage_elem *selem,
 90				     bool uncharge_mem)
 91{
 92	struct bpf_local_storage_map *smap;
 93	bool free_local_storage;
 94	void *owner;
 95
 96	smap = rcu_dereference(SDATA(selem)->smap);
 97	owner = local_storage->owner;
 98
 99	/* All uncharging on the owner must be done first.
100	 * The owner may be freed once the last selem is unlinked
101	 * from local_storage.
102	 */
103	if (uncharge_mem)
104		mem_uncharge(smap, owner, smap->elem_size);
105
106	free_local_storage = hlist_is_singular_node(&selem->snode,
107						    &local_storage->list);
108	if (free_local_storage) {
109		mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
110		local_storage->owner = NULL;
111
112		/* After this RCU_INIT, owner may be freed and cannot be used */
113		RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
114
115		/* local_storage is not freed now.  local_storage->lock is
116		 * still held and raw_spin_unlock_bh(&local_storage->lock)
117		 * will be done by the caller.
118		 *
119		 * Although the unlock will be done under
120		 * rcu_read_lock(),  it is more intutivie to
121		 * read if kfree_rcu(local_storage, rcu) is done
122		 * after the raw_spin_unlock_bh(&local_storage->lock).
123		 *
124		 * Hence, a "bool free_local_storage" is returned
125		 * to the caller which then calls the kfree_rcu()
126		 * after unlock.
127		 */
128	}
129	hlist_del_init_rcu(&selem->snode);
130	if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
131	    SDATA(selem))
132		RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
133
134	kfree_rcu(selem, rcu);
135
136	return free_local_storage;
137}
138
139static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
140{
141	struct bpf_local_storage *local_storage;
142	bool free_local_storage = false;
143	unsigned long flags;
144
145	if (unlikely(!selem_linked_to_storage(selem)))
146		/* selem has already been unlinked from sk */
147		return;
148
149	local_storage = rcu_dereference(selem->local_storage);
150	raw_spin_lock_irqsave(&local_storage->lock, flags);
151	if (likely(selem_linked_to_storage(selem)))
152		free_local_storage = bpf_selem_unlink_storage_nolock(
153			local_storage, selem, true);
154	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
155
156	if (free_local_storage)
157		kfree_rcu(local_storage, rcu);
158}
159
160void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
161				   struct bpf_local_storage_elem *selem)
162{
163	RCU_INIT_POINTER(selem->local_storage, local_storage);
164	hlist_add_head_rcu(&selem->snode, &local_storage->list);
165}
166
167void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
168{
169	struct bpf_local_storage_map *smap;
170	struct bpf_local_storage_map_bucket *b;
171	unsigned long flags;
172
173	if (unlikely(!selem_linked_to_map(selem)))
174		/* selem has already be unlinked from smap */
175		return;
176
177	smap = rcu_dereference(SDATA(selem)->smap);
178	b = select_bucket(smap, selem);
179	raw_spin_lock_irqsave(&b->lock, flags);
180	if (likely(selem_linked_to_map(selem)))
181		hlist_del_init_rcu(&selem->map_node);
182	raw_spin_unlock_irqrestore(&b->lock, flags);
183}
184
185void bpf_selem_link_map(struct bpf_local_storage_map *smap,
186			struct bpf_local_storage_elem *selem)
187{
188	struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
189	unsigned long flags;
190
191	raw_spin_lock_irqsave(&b->lock, flags);
192	RCU_INIT_POINTER(SDATA(selem)->smap, smap);
193	hlist_add_head_rcu(&selem->map_node, &b->list);
194	raw_spin_unlock_irqrestore(&b->lock, flags);
195}
196
197void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
198{
199	/* Always unlink from map before unlinking from local_storage
200	 * because selem will be freed after successfully unlinked from
201	 * the local_storage.
202	 */
203	bpf_selem_unlink_map(selem);
204	__bpf_selem_unlink_storage(selem);
205}
206
207struct bpf_local_storage_data *
208bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
209			 struct bpf_local_storage_map *smap,
210			 bool cacheit_lockit)
211{
212	struct bpf_local_storage_data *sdata;
213	struct bpf_local_storage_elem *selem;
214
215	/* Fast path (cache hit) */
216	sdata = rcu_dereference(local_storage->cache[smap->cache_idx]);
217	if (sdata && rcu_access_pointer(sdata->smap) == smap)
218		return sdata;
219
220	/* Slow path (cache miss) */
221	hlist_for_each_entry_rcu(selem, &local_storage->list, snode)
222		if (rcu_access_pointer(SDATA(selem)->smap) == smap)
223			break;
224
225	if (!selem)
226		return NULL;
227
228	sdata = SDATA(selem);
229	if (cacheit_lockit) {
230		unsigned long flags;
231
232		/* spinlock is needed to avoid racing with the
233		 * parallel delete.  Otherwise, publishing an already
234		 * deleted sdata to the cache will become a use-after-free
235		 * problem in the next bpf_local_storage_lookup().
236		 */
237		raw_spin_lock_irqsave(&local_storage->lock, flags);
238		if (selem_linked_to_storage(selem))
239			rcu_assign_pointer(local_storage->cache[smap->cache_idx],
240					   sdata);
241		raw_spin_unlock_irqrestore(&local_storage->lock, flags);
242	}
243
244	return sdata;
245}
246
247static int check_flags(const struct bpf_local_storage_data *old_sdata,
248		       u64 map_flags)
249{
250	if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
251		/* elem already exists */
252		return -EEXIST;
253
254	if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
255		/* elem doesn't exist, cannot update it */
256		return -ENOENT;
257
258	return 0;
259}
260
261int bpf_local_storage_alloc(void *owner,
262			    struct bpf_local_storage_map *smap,
263			    struct bpf_local_storage_elem *first_selem)
264{
265	struct bpf_local_storage *prev_storage, *storage;
266	struct bpf_local_storage **owner_storage_ptr;
267	int err;
268
269	err = mem_charge(smap, owner, sizeof(*storage));
270	if (err)
271		return err;
272
273	storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
274				  GFP_ATOMIC | __GFP_NOWARN);
275	if (!storage) {
276		err = -ENOMEM;
277		goto uncharge;
278	}
279
280	INIT_HLIST_HEAD(&storage->list);
281	raw_spin_lock_init(&storage->lock);
282	storage->owner = owner;
283
284	bpf_selem_link_storage_nolock(storage, first_selem);
285	bpf_selem_link_map(smap, first_selem);
286
287	owner_storage_ptr =
288		(struct bpf_local_storage **)owner_storage(smap, owner);
289	/* Publish storage to the owner.
290	 * Instead of using any lock of the kernel object (i.e. owner),
291	 * cmpxchg will work with any kernel object regardless what
292	 * the running context is, bh, irq...etc.
293	 *
294	 * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage)
295	 * is protected by the storage->lock.  Hence, when freeing
296	 * the owner->storage, the storage->lock must be held before
297	 * setting owner->storage ptr to NULL.
298	 */
299	prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
300	if (unlikely(prev_storage)) {
301		bpf_selem_unlink_map(first_selem);
302		err = -EAGAIN;
303		goto uncharge;
304
305		/* Note that even first_selem was linked to smap's
306		 * bucket->list, first_selem can be freed immediately
307		 * (instead of kfree_rcu) because
308		 * bpf_local_storage_map_free() does a
309		 * synchronize_rcu() before walking the bucket->list.
310		 * Hence, no one is accessing selem from the
311		 * bucket->list under rcu_read_lock().
312		 */
313	}
314
315	return 0;
316
317uncharge:
318	kfree(storage);
319	mem_uncharge(smap, owner, sizeof(*storage));
320	return err;
321}
322
323/* sk cannot be going away because it is linking new elem
324 * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0).
325 * Otherwise, it will become a leak (and other memory issues
326 * during map destruction).
327 */
328struct bpf_local_storage_data *
329bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
330			 void *value, u64 map_flags)
331{
332	struct bpf_local_storage_data *old_sdata = NULL;
333	struct bpf_local_storage_elem *selem;
334	struct bpf_local_storage *local_storage;
335	unsigned long flags;
336	int err;
337
338	/* BPF_EXIST and BPF_NOEXIST cannot be both set */
339	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
340	    /* BPF_F_LOCK can only be used in a value with spin_lock */
341	    unlikely((map_flags & BPF_F_LOCK) &&
342		     !map_value_has_spin_lock(&smap->map)))
343		return ERR_PTR(-EINVAL);
344
345	local_storage = rcu_dereference(*owner_storage(smap, owner));
346	if (!local_storage || hlist_empty(&local_storage->list)) {
347		/* Very first elem for the owner */
348		err = check_flags(NULL, map_flags);
349		if (err)
350			return ERR_PTR(err);
351
352		selem = bpf_selem_alloc(smap, owner, value, true);
353		if (!selem)
354			return ERR_PTR(-ENOMEM);
355
356		err = bpf_local_storage_alloc(owner, smap, selem);
357		if (err) {
358			kfree(selem);
359			mem_uncharge(smap, owner, smap->elem_size);
360			return ERR_PTR(err);
361		}
362
363		return SDATA(selem);
364	}
365
366	if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
367		/* Hoping to find an old_sdata to do inline update
368		 * such that it can avoid taking the local_storage->lock
369		 * and changing the lists.
370		 */
371		old_sdata =
372			bpf_local_storage_lookup(local_storage, smap, false);
373		err = check_flags(old_sdata, map_flags);
374		if (err)
375			return ERR_PTR(err);
376		if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
377			copy_map_value_locked(&smap->map, old_sdata->data,
378					      value, false);
379			return old_sdata;
380		}
381	}
382
383	raw_spin_lock_irqsave(&local_storage->lock, flags);
384
385	/* Recheck local_storage->list under local_storage->lock */
386	if (unlikely(hlist_empty(&local_storage->list))) {
387		/* A parallel del is happening and local_storage is going
388		 * away.  It has just been checked before, so very
389		 * unlikely.  Return instead of retry to keep things
390		 * simple.
391		 */
392		err = -EAGAIN;
393		goto unlock_err;
394	}
395
396	old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
397	err = check_flags(old_sdata, map_flags);
398	if (err)
399		goto unlock_err;
400
401	if (old_sdata && (map_flags & BPF_F_LOCK)) {
402		copy_map_value_locked(&smap->map, old_sdata->data, value,
403				      false);
404		selem = SELEM(old_sdata);
405		goto unlock;
406	}
407
408	/* local_storage->lock is held.  Hence, we are sure
409	 * we can unlink and uncharge the old_sdata successfully
410	 * later.  Hence, instead of charging the new selem now
411	 * and then uncharge the old selem later (which may cause
412	 * a potential but unnecessary charge failure),  avoid taking
413	 * a charge at all here (the "!old_sdata" check) and the
414	 * old_sdata will not be uncharged later during
415	 * bpf_selem_unlink_storage_nolock().
416	 */
417	selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
418	if (!selem) {
419		err = -ENOMEM;
420		goto unlock_err;
421	}
422
423	/* First, link the new selem to the map */
424	bpf_selem_link_map(smap, selem);
425
426	/* Second, link (and publish) the new selem to local_storage */
427	bpf_selem_link_storage_nolock(local_storage, selem);
428
429	/* Third, remove old selem, SELEM(old_sdata) */
430	if (old_sdata) {
431		bpf_selem_unlink_map(SELEM(old_sdata));
432		bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
433						false);
434	}
435
436unlock:
437	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
438	return SDATA(selem);
439
440unlock_err:
441	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
442	return ERR_PTR(err);
443}
444
445u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
446{
447	u64 min_usage = U64_MAX;
448	u16 i, res = 0;
449
450	spin_lock(&cache->idx_lock);
451
452	for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) {
453		if (cache->idx_usage_counts[i] < min_usage) {
454			min_usage = cache->idx_usage_counts[i];
455			res = i;
456
457			/* Found a free cache_idx */
458			if (!min_usage)
459				break;
460		}
461	}
462	cache->idx_usage_counts[res]++;
463
464	spin_unlock(&cache->idx_lock);
465
466	return res;
467}
468
469void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
470				      u16 idx)
471{
472	spin_lock(&cache->idx_lock);
473	cache->idx_usage_counts[idx]--;
474	spin_unlock(&cache->idx_lock);
475}
476
477void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
478				int __percpu *busy_counter)
479{
480	struct bpf_local_storage_elem *selem;
481	struct bpf_local_storage_map_bucket *b;
482	unsigned int i;
483
484	/* Note that this map might be concurrently cloned from
485	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
486	 * RCU read section to finish before proceeding. New RCU
487	 * read sections should be prevented via bpf_map_inc_not_zero.
488	 */
489	synchronize_rcu();
490
491	/* bpf prog and the userspace can no longer access this map
492	 * now.  No new selem (of this map) can be added
493	 * to the owner->storage or to the map bucket's list.
494	 *
495	 * The elem of this map can be cleaned up here
496	 * or when the storage is freed e.g.
497	 * by bpf_sk_storage_free() during __sk_destruct().
498	 */
499	for (i = 0; i < (1U << smap->bucket_log); i++) {
500		b = &smap->buckets[i];
501
502		rcu_read_lock();
503		/* No one is adding to b->list now */
504		while ((selem = hlist_entry_safe(
505				rcu_dereference_raw(hlist_first_rcu(&b->list)),
506				struct bpf_local_storage_elem, map_node))) {
507			if (busy_counter) {
508				migrate_disable();
509				__this_cpu_inc(*busy_counter);
510			}
511			bpf_selem_unlink(selem);
512			if (busy_counter) {
513				__this_cpu_dec(*busy_counter);
514				migrate_enable();
515			}
516			cond_resched_rcu();
517		}
518		rcu_read_unlock();
519	}
520
521	/* While freeing the storage we may still need to access the map.
522	 *
523	 * e.g. when bpf_sk_storage_free() has unlinked selem from the map
524	 * which then made the above while((selem = ...)) loop
525	 * exit immediately.
526	 *
527	 * However, while freeing the storage one still needs to access the
528	 * smap->elem_size to do the uncharging in
529	 * bpf_selem_unlink_storage_nolock().
530	 *
531	 * Hence, wait another rcu grace period for the storage to be freed.
532	 */
533	synchronize_rcu();
534
535	kvfree(smap->buckets);
536	kfree(smap);
537}
538
539int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
540{
541	if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
542	    !(attr->map_flags & BPF_F_NO_PREALLOC) ||
543	    attr->max_entries ||
544	    attr->key_size != sizeof(int) || !attr->value_size ||
545	    /* Enforce BTF for userspace sk dumping */
546	    !attr->btf_key_type_id || !attr->btf_value_type_id)
547		return -EINVAL;
548
549	if (!bpf_capable())
550		return -EPERM;
551
552	if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE)
553		return -E2BIG;
554
555	return 0;
556}
557
558struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
559{
560	struct bpf_local_storage_map *smap;
561	unsigned int i;
562	u32 nbuckets;
563
564	smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
565	if (!smap)
566		return ERR_PTR(-ENOMEM);
567	bpf_map_init_from_attr(&smap->map, attr);
568
569	nbuckets = roundup_pow_of_two(num_possible_cpus());
570	/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
571	nbuckets = max_t(u32, 2, nbuckets);
572	smap->bucket_log = ilog2(nbuckets);
573
574	smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
575				 GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
576	if (!smap->buckets) {
577		kfree(smap);
578		return ERR_PTR(-ENOMEM);
579	}
580
581	for (i = 0; i < nbuckets; i++) {
582		INIT_HLIST_HEAD(&smap->buckets[i].list);
583		raw_spin_lock_init(&smap->buckets[i].lock);
584	}
585
586	smap->elem_size =
587		sizeof(struct bpf_local_storage_elem) + attr->value_size;
588
589	return smap;
590}
591
592int bpf_local_storage_map_check_btf(const struct bpf_map *map,
593				    const struct btf *btf,
594				    const struct btf_type *key_type,
595				    const struct btf_type *value_type)
596{
597	u32 int_data;
598
599	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
600		return -EINVAL;
601
602	int_data = *(u32 *)(key_type + 1);
603	if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
604		return -EINVAL;
605
606	return 0;
607}