Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.10.11.
  1/*
  2 * Copyright (c) 2017 Nicira, Inc.
  3 *
  4 * This program is free software; you can redistribute it and/or
  5 * modify it under the terms of version 2 of the GNU General Public
  6 * License as published by the Free Software Foundation.
  7 */
  8
  9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 10
 11#include <linux/if.h>
 12#include <linux/skbuff.h>
 13#include <linux/ip.h>
 14#include <linux/kernel.h>
 15#include <linux/openvswitch.h>
 16#include <linux/netlink.h>
 17#include <linux/rculist.h>
 18
 19#include <net/netlink.h>
 20#include <net/genetlink.h>
 21
 22#include "datapath.h"
 23#include "meter.h"
 24
 25#define METER_HASH_BUCKETS 1024
 26
 27static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
 28	[OVS_METER_ATTR_ID] = { .type = NLA_U32, },
 29	[OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
 30	[OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
 31	[OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
 32	[OVS_METER_ATTR_USED] = { .type = NLA_U64 },
 33	[OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
 34	[OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
 35	[OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
 36};
 37
 38static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
 39	[OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
 40	[OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
 41	[OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
 42	[OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
 43};
 44
 45static void ovs_meter_free(struct dp_meter *meter)
 46{
 47	if (!meter)
 48		return;
 49
 50	kfree_rcu(meter, rcu);
 51}
 52
 53static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
 54					    u32 meter_id)
 55{
 56	return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
 57}
 58
 59/* Call with ovs_mutex or RCU read lock. */
 60static struct dp_meter *lookup_meter(const struct datapath *dp,
 61				     u32 meter_id)
 62{
 63	struct dp_meter *meter;
 64	struct hlist_head *head;
 65
 66	head = meter_hash_bucket(dp, meter_id);
 67	hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
 68		if (meter->id == meter_id)
 69			return meter;
 70	}
 71	return NULL;
 72}
 73
 74static void attach_meter(struct datapath *dp, struct dp_meter *meter)
 75{
 76	struct hlist_head *head = meter_hash_bucket(dp, meter->id);
 77
 78	hlist_add_head_rcu(&meter->dp_hash_node, head);
 79}
 80
 81static void detach_meter(struct dp_meter *meter)
 82{
 83	ASSERT_OVSL();
 84	if (meter)
 85		hlist_del_rcu(&meter->dp_hash_node);
 86}
 87
 88static struct sk_buff *
 89ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
 90			  struct ovs_header **ovs_reply_header)
 91{
 92	struct sk_buff *skb;
 93	struct ovs_header *ovs_header = info->userhdr;
 94
 95	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
 96	if (!skb)
 97		return ERR_PTR(-ENOMEM);
 98
 99	*ovs_reply_header = genlmsg_put(skb, info->snd_portid,
100					info->snd_seq,
101					&dp_meter_genl_family, 0, cmd);
102	if (!*ovs_reply_header) {
103		nlmsg_free(skb);
104		return ERR_PTR(-EMSGSIZE);
105	}
106	(*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
107
108	return skb;
109}
110
111static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
112				     struct dp_meter *meter)
113{
114	struct nlattr *nla;
115	struct dp_meter_band *band;
116	u16 i;
117
118	if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
119		goto error;
120
121	if (!meter)
122		return 0;
123
124	if (nla_put(reply, OVS_METER_ATTR_STATS,
125		    sizeof(struct ovs_flow_stats), &meter->stats) ||
126	    nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
127			      OVS_METER_ATTR_PAD))
128		goto error;
129
130	nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
131	if (!nla)
132		goto error;
133
134	band = meter->bands;
135
136	for (i = 0; i < meter->n_bands; ++i, ++band) {
137		struct nlattr *band_nla;
138
139		band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
140		if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
141					 sizeof(struct ovs_flow_stats),
142					 &band->stats))
143			goto error;
144		nla_nest_end(reply, band_nla);
145	}
146	nla_nest_end(reply, nla);
147
148	return 0;
149error:
150	return -EMSGSIZE;
151}
152
153static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
154{
155	struct sk_buff *reply;
156	struct ovs_header *ovs_reply_header;
157	struct nlattr *nla, *band_nla;
158	int err;
159
160	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
161					  &ovs_reply_header);
162	if (IS_ERR(reply))
163		return PTR_ERR(reply);
164
165	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
166	    nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
167		goto nla_put_failure;
168
169	nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
170	if (!nla)
171		goto nla_put_failure;
172
173	band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
174	if (!band_nla)
175		goto nla_put_failure;
176	/* Currently only DROP band type is supported. */
177	if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
178		goto nla_put_failure;
179	nla_nest_end(reply, band_nla);
180	nla_nest_end(reply, nla);
181
182	genlmsg_end(reply, ovs_reply_header);
183	return genlmsg_reply(reply, info);
184
185nla_put_failure:
186	nlmsg_free(reply);
187	err = -EMSGSIZE;
188	return err;
189}
190
191static struct dp_meter *dp_meter_create(struct nlattr **a)
192{
193	struct nlattr *nla;
194	int rem;
195	u16 n_bands = 0;
196	struct dp_meter *meter;
197	struct dp_meter_band *band;
198	int err;
199
200	/* Validate attributes, count the bands. */
201	if (!a[OVS_METER_ATTR_BANDS])
202		return ERR_PTR(-EINVAL);
203
204	nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
205		if (++n_bands > DP_MAX_BANDS)
206			return ERR_PTR(-EINVAL);
207
208	/* Allocate and set up the meter before locking anything. */
209	meter = kzalloc(n_bands * sizeof(struct dp_meter_band) +
210			sizeof(*meter), GFP_KERNEL);
211	if (!meter)
212		return ERR_PTR(-ENOMEM);
213
214	meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
215	meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
216	meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
217	spin_lock_init(&meter->lock);
218	if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
219		meter->stats = *(struct ovs_flow_stats *)
220			nla_data(a[OVS_METER_ATTR_STATS]);
221	}
222	meter->n_bands = n_bands;
223
224	/* Set up meter bands. */
225	band = meter->bands;
226	nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
227		struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
228		u32 band_max_delta_t;
229
230		err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
231				nla_data(nla), nla_len(nla), band_policy,
232				NULL);
233		if (err)
234			goto exit_free_meter;
235
236		if (!attr[OVS_BAND_ATTR_TYPE] ||
237		    !attr[OVS_BAND_ATTR_RATE] ||
238		    !attr[OVS_BAND_ATTR_BURST]) {
239			err = -EINVAL;
240			goto exit_free_meter;
241		}
242
243		band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
244		band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
245		if (band->rate == 0) {
246			err = -EINVAL;
247			goto exit_free_meter;
248		}
249
250		band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
251		/* Figure out max delta_t that is enough to fill any bucket.
252		 * Keep max_delta_t size to the bucket units:
253		 * pkts => 1/1000 packets, kilobits => bits.
254		 *
255		 * Start with a full bucket.
256		 */
257		band->bucket = (band->burst_size + band->rate) * 1000;
258		band_max_delta_t = band->bucket / band->rate;
259		if (band_max_delta_t > meter->max_delta_t)
260			meter->max_delta_t = band_max_delta_t;
261		band++;
262	}
263
264	return meter;
265
266exit_free_meter:
267	kfree(meter);
268	return ERR_PTR(err);
269}
270
271static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
272{
273	struct nlattr **a = info->attrs;
274	struct dp_meter *meter, *old_meter;
275	struct sk_buff *reply;
276	struct ovs_header *ovs_reply_header;
277	struct ovs_header *ovs_header = info->userhdr;
278	struct datapath *dp;
279	int err;
280	u32 meter_id;
281	bool failed;
282
283	meter = dp_meter_create(a);
284	if (IS_ERR_OR_NULL(meter))
285		return PTR_ERR(meter);
286
287	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
288					  &ovs_reply_header);
289	if (IS_ERR(reply)) {
290		err = PTR_ERR(reply);
291		goto exit_free_meter;
292	}
293
294	ovs_lock();
295	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
296	if (!dp) {
297		err = -ENODEV;
298		goto exit_unlock;
299	}
300
301	if (!a[OVS_METER_ATTR_ID]) {
302		err = -ENODEV;
303		goto exit_unlock;
304	}
305
306	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
307
308	/* Cannot fail after this. */
309	old_meter = lookup_meter(dp, meter_id);
310	detach_meter(old_meter);
311	attach_meter(dp, meter);
312	ovs_unlock();
313
314	/* Build response with the meter_id and stats from
315	 * the old meter, if any.
316	 */
317	failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
318	WARN_ON(failed);
319	if (old_meter) {
320		spin_lock_bh(&old_meter->lock);
321		if (old_meter->keep_stats) {
322			err = ovs_meter_cmd_reply_stats(reply, meter_id,
323							old_meter);
324			WARN_ON(err);
325		}
326		spin_unlock_bh(&old_meter->lock);
327		ovs_meter_free(old_meter);
328	}
329
330	genlmsg_end(reply, ovs_reply_header);
331	return genlmsg_reply(reply, info);
332
333exit_unlock:
334	ovs_unlock();
335	nlmsg_free(reply);
336exit_free_meter:
337	kfree(meter);
338	return err;
339}
340
341static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
342{
343	struct nlattr **a = info->attrs;
344	u32 meter_id;
345	struct ovs_header *ovs_header = info->userhdr;
346	struct ovs_header *ovs_reply_header;
347	struct datapath *dp;
348	int err;
349	struct sk_buff *reply;
350	struct dp_meter *meter;
351
352	if (!a[OVS_METER_ATTR_ID])
353		return -EINVAL;
354
355	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
356
357	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
358					  &ovs_reply_header);
359	if (IS_ERR(reply))
360		return PTR_ERR(reply);
361
362	ovs_lock();
363
364	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
365	if (!dp) {
366		err = -ENODEV;
367		goto exit_unlock;
368	}
369
370	/* Locate meter, copy stats. */
371	meter = lookup_meter(dp, meter_id);
372	if (!meter) {
373		err = -ENOENT;
374		goto exit_unlock;
375	}
376
377	spin_lock_bh(&meter->lock);
378	err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
379	spin_unlock_bh(&meter->lock);
380	if (err)
381		goto exit_unlock;
382
383	ovs_unlock();
384
385	genlmsg_end(reply, ovs_reply_header);
386	return genlmsg_reply(reply, info);
387
388exit_unlock:
389	ovs_unlock();
390	nlmsg_free(reply);
391	return err;
392}
393
394static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
395{
396	struct nlattr **a = info->attrs;
397	u32 meter_id;
398	struct ovs_header *ovs_header = info->userhdr;
399	struct ovs_header *ovs_reply_header;
400	struct datapath *dp;
401	int err;
402	struct sk_buff *reply;
403	struct dp_meter *old_meter;
404
405	if (!a[OVS_METER_ATTR_ID])
406		return -EINVAL;
407	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
408
409	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
410					  &ovs_reply_header);
411	if (IS_ERR(reply))
412		return PTR_ERR(reply);
413
414	ovs_lock();
415
416	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
417	if (!dp) {
418		err = -ENODEV;
419		goto exit_unlock;
420	}
421
422	old_meter = lookup_meter(dp, meter_id);
423	if (old_meter) {
424		spin_lock_bh(&old_meter->lock);
425		err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
426		WARN_ON(err);
427		spin_unlock_bh(&old_meter->lock);
428		detach_meter(old_meter);
429	}
430	ovs_unlock();
431	ovs_meter_free(old_meter);
432	genlmsg_end(reply, ovs_reply_header);
433	return genlmsg_reply(reply, info);
434
435exit_unlock:
436	ovs_unlock();
437	nlmsg_free(reply);
438	return err;
439}
440
441/* Meter action execution.
442 *
443 * Return true 'meter_id' drop band is triggered. The 'skb' should be
444 * dropped by the caller'.
445 */
446bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
447		       struct sw_flow_key *key, u32 meter_id)
448{
449	struct dp_meter *meter;
450	struct dp_meter_band *band;
451	long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
452	long long int long_delta_ms;
453	u32 delta_ms;
454	u32 cost;
455	int i, band_exceeded_max = -1;
456	u32 band_exceeded_rate = 0;
457
458	meter = lookup_meter(dp, meter_id);
459	/* Do not drop the packet when there is no meter. */
460	if (!meter)
461		return false;
462
463	/* Lock the meter while using it. */
464	spin_lock(&meter->lock);
465
466	long_delta_ms = (now_ms - meter->used); /* ms */
467
468	/* Make sure delta_ms will not be too large, so that bucket will not
469	 * wrap around below.
470	 */
471	delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
472		   ? meter->max_delta_t : (u32)long_delta_ms;
473
474	/* Update meter statistics.
475	 */
476	meter->used = now_ms;
477	meter->stats.n_packets += 1;
478	meter->stats.n_bytes += skb->len;
479
480	/* Bucket rate is either in kilobits per second, or in packets per
481	 * second.  We maintain the bucket in the units of either bits or
482	 * 1/1000th of a packet, correspondingly.
483	 * Then, when rate is multiplied with milliseconds, we get the
484	 * bucket units:
485	 * msec * kbps = bits, and
486	 * msec * packets/sec = 1/1000 packets.
487	 *
488	 * 'cost' is the number of bucket units in this packet.
489	 */
490	cost = (meter->kbps) ? skb->len * 8 : 1000;
491
492	/* Update all bands and find the one hit with the highest rate. */
493	for (i = 0; i < meter->n_bands; ++i) {
494		long long int max_bucket_size;
495
496		band = &meter->bands[i];
497		max_bucket_size = (band->burst_size + band->rate) * 1000LL;
498
499		band->bucket += delta_ms * band->rate;
500		if (band->bucket > max_bucket_size)
501			band->bucket = max_bucket_size;
502
503		if (band->bucket >= cost) {
504			band->bucket -= cost;
505		} else if (band->rate > band_exceeded_rate) {
506			band_exceeded_rate = band->rate;
507			band_exceeded_max = i;
508		}
509	}
510
511	if (band_exceeded_max >= 0) {
512		/* Update band statistics. */
513		band = &meter->bands[band_exceeded_max];
514		band->stats.n_packets += 1;
515		band->stats.n_bytes += skb->len;
516
517		/* Drop band triggered, let the caller drop the 'skb'.  */
518		if (band->type == OVS_METER_BAND_TYPE_DROP) {
519			spin_unlock(&meter->lock);
520			return true;
521		}
522	}
523
524	spin_unlock(&meter->lock);
525	return false;
526}
527
528static struct genl_ops dp_meter_genl_ops[] = {
529	{ .cmd = OVS_METER_CMD_FEATURES,
530		.flags = 0,		  /* OK for unprivileged users. */
531		.policy = meter_policy,
532		.doit = ovs_meter_cmd_features
533	},
534	{ .cmd = OVS_METER_CMD_SET,
535		.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
536					   *  privilege.
537					   */
538		.policy = meter_policy,
539		.doit = ovs_meter_cmd_set,
540	},
541	{ .cmd = OVS_METER_CMD_GET,
542		.flags = 0,		  /* OK for unprivileged users. */
543		.policy = meter_policy,
544		.doit = ovs_meter_cmd_get,
545	},
546	{ .cmd = OVS_METER_CMD_DEL,
547		.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
548					   *  privilege.
549					   */
550		.policy = meter_policy,
551		.doit = ovs_meter_cmd_del
552	},
553};
554
555static const struct genl_multicast_group ovs_meter_multicast_group = {
556	.name = OVS_METER_MCGROUP,
557};
558
559struct genl_family dp_meter_genl_family __ro_after_init = {
560	.hdrsize = sizeof(struct ovs_header),
561	.name = OVS_METER_FAMILY,
562	.version = OVS_METER_VERSION,
563	.maxattr = OVS_METER_ATTR_MAX,
564	.netnsok = true,
565	.parallel_ops = true,
566	.ops = dp_meter_genl_ops,
567	.n_ops = ARRAY_SIZE(dp_meter_genl_ops),
568	.mcgrps = &ovs_meter_multicast_group,
569	.n_mcgrps = 1,
570	.module = THIS_MODULE,
571};
572
573int ovs_meters_init(struct datapath *dp)
574{
575	int i;
576
577	dp->meters = kmalloc_array(METER_HASH_BUCKETS,
578				   sizeof(struct hlist_head), GFP_KERNEL);
579
580	if (!dp->meters)
581		return -ENOMEM;
582
583	for (i = 0; i < METER_HASH_BUCKETS; i++)
584		INIT_HLIST_HEAD(&dp->meters[i]);
585
586	return 0;
587}
588
589void ovs_meters_exit(struct datapath *dp)
590{
591	int i;
592
593	for (i = 0; i < METER_HASH_BUCKETS; i++) {
594		struct hlist_head *head = &dp->meters[i];
595		struct dp_meter *meter;
596		struct hlist_node *n;
597
598		hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
599			kfree(meter);
600	}
601
602	kfree(dp->meters);
603}