Loading...
1/*
2 * Copyright (c) 2007-2014 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/init.h>
22#include <linux/module.h>
23#include <linux/if_arp.h>
24#include <linux/if_vlan.h>
25#include <linux/in.h>
26#include <linux/ip.h>
27#include <linux/jhash.h>
28#include <linux/delay.h>
29#include <linux/time.h>
30#include <linux/etherdevice.h>
31#include <linux/genetlink.h>
32#include <linux/kernel.h>
33#include <linux/kthread.h>
34#include <linux/mutex.h>
35#include <linux/percpu.h>
36#include <linux/rcupdate.h>
37#include <linux/tcp.h>
38#include <linux/udp.h>
39#include <linux/ethtool.h>
40#include <linux/wait.h>
41#include <asm/div64.h>
42#include <linux/highmem.h>
43#include <linux/netfilter_bridge.h>
44#include <linux/netfilter_ipv4.h>
45#include <linux/inetdevice.h>
46#include <linux/list.h>
47#include <linux/openvswitch.h>
48#include <linux/rculist.h>
49#include <linux/dmi.h>
50#include <net/genetlink.h>
51#include <net/net_namespace.h>
52#include <net/netns/generic.h>
53
54#include "datapath.h"
55#include "flow.h"
56#include "flow_table.h"
57#include "flow_netlink.h"
58#include "vport-internal_dev.h"
59#include "vport-netdev.h"
60
61unsigned int ovs_net_id __read_mostly;
62
63static struct genl_family dp_packet_genl_family;
64static struct genl_family dp_flow_genl_family;
65static struct genl_family dp_datapath_genl_family;
66
67static const struct nla_policy flow_policy[];
68
69static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
70 .name = OVS_FLOW_MCGROUP,
71};
72
73static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
74 .name = OVS_DATAPATH_MCGROUP,
75};
76
77static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
78 .name = OVS_VPORT_MCGROUP,
79};
80
81/* Check if need to build a reply message.
82 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
83static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
84 unsigned int group)
85{
86 return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
87 genl_has_listeners(family, genl_info_net(info), group);
88}
89
90static void ovs_notify(struct genl_family *family,
91 struct sk_buff *skb, struct genl_info *info)
92{
93 genl_notify(family, skb, info, 0, GFP_KERNEL);
94}
95
96/**
97 * DOC: Locking:
98 *
99 * All writes e.g. Writes to device state (add/remove datapath, port, set
100 * operations on vports, etc.), Writes to other state (flow table
101 * modifications, set miscellaneous datapath parameters, etc.) are protected
102 * by ovs_lock.
103 *
104 * Reads are protected by RCU.
105 *
106 * There are a few special cases (mostly stats) that have their own
107 * synchronization but they nest under all of above and don't interact with
108 * each other.
109 *
110 * The RTNL lock nests inside ovs_mutex.
111 */
112
113static DEFINE_MUTEX(ovs_mutex);
114
115void ovs_lock(void)
116{
117 mutex_lock(&ovs_mutex);
118}
119
120void ovs_unlock(void)
121{
122 mutex_unlock(&ovs_mutex);
123}
124
125#ifdef CONFIG_LOCKDEP
126int lockdep_ovsl_is_held(void)
127{
128 if (debug_locks)
129 return lockdep_is_held(&ovs_mutex);
130 else
131 return 1;
132}
133#endif
134
135static struct vport *new_vport(const struct vport_parms *);
136static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
137 const struct sw_flow_key *,
138 const struct dp_upcall_info *,
139 uint32_t cutlen);
140static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
141 const struct sw_flow_key *,
142 const struct dp_upcall_info *,
143 uint32_t cutlen);
144
145/* Must be called with rcu_read_lock. */
146static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
147{
148 struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
149
150 if (dev) {
151 struct vport *vport = ovs_internal_dev_get_vport(dev);
152 if (vport)
153 return vport->dp;
154 }
155
156 return NULL;
157}
158
159/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
160 * returned dp pointer valid.
161 */
162static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
163{
164 struct datapath *dp;
165
166 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
167 rcu_read_lock();
168 dp = get_dp_rcu(net, dp_ifindex);
169 rcu_read_unlock();
170
171 return dp;
172}
173
174/* Must be called with rcu_read_lock or ovs_mutex. */
175const char *ovs_dp_name(const struct datapath *dp)
176{
177 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
178 return ovs_vport_name(vport);
179}
180
181static int get_dpifindex(const struct datapath *dp)
182{
183 struct vport *local;
184 int ifindex;
185
186 rcu_read_lock();
187
188 local = ovs_vport_rcu(dp, OVSP_LOCAL);
189 if (local)
190 ifindex = local->dev->ifindex;
191 else
192 ifindex = 0;
193
194 rcu_read_unlock();
195
196 return ifindex;
197}
198
199static void destroy_dp_rcu(struct rcu_head *rcu)
200{
201 struct datapath *dp = container_of(rcu, struct datapath, rcu);
202
203 ovs_flow_tbl_destroy(&dp->table);
204 free_percpu(dp->stats_percpu);
205 kfree(dp->ports);
206 kfree(dp);
207}
208
209static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
210 u16 port_no)
211{
212 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
213}
214
215/* Called with ovs_mutex or RCU read lock. */
216struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
217{
218 struct vport *vport;
219 struct hlist_head *head;
220
221 head = vport_hash_bucket(dp, port_no);
222 hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
223 if (vport->port_no == port_no)
224 return vport;
225 }
226 return NULL;
227}
228
229/* Called with ovs_mutex. */
230static struct vport *new_vport(const struct vport_parms *parms)
231{
232 struct vport *vport;
233
234 vport = ovs_vport_add(parms);
235 if (!IS_ERR(vport)) {
236 struct datapath *dp = parms->dp;
237 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
238
239 hlist_add_head_rcu(&vport->dp_hash_node, head);
240 }
241 return vport;
242}
243
244void ovs_dp_detach_port(struct vport *p)
245{
246 ASSERT_OVSL();
247
248 /* First drop references to device. */
249 hlist_del_rcu(&p->dp_hash_node);
250
251 /* Then destroy it. */
252 ovs_vport_del(p);
253}
254
255/* Must be called with rcu_read_lock. */
256void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
257{
258 const struct vport *p = OVS_CB(skb)->input_vport;
259 struct datapath *dp = p->dp;
260 struct sw_flow *flow;
261 struct sw_flow_actions *sf_acts;
262 struct dp_stats_percpu *stats;
263 u64 *stats_counter;
264 u32 n_mask_hit;
265
266 stats = this_cpu_ptr(dp->stats_percpu);
267
268 /* Look up flow. */
269 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
270 if (unlikely(!flow)) {
271 struct dp_upcall_info upcall;
272 int error;
273
274 memset(&upcall, 0, sizeof(upcall));
275 upcall.cmd = OVS_PACKET_CMD_MISS;
276 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
277 upcall.mru = OVS_CB(skb)->mru;
278 error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
279 if (unlikely(error))
280 kfree_skb(skb);
281 else
282 consume_skb(skb);
283 stats_counter = &stats->n_missed;
284 goto out;
285 }
286
287 ovs_flow_stats_update(flow, key->tp.flags, skb);
288 sf_acts = rcu_dereference(flow->sf_acts);
289 ovs_execute_actions(dp, skb, sf_acts, key);
290
291 stats_counter = &stats->n_hit;
292
293out:
294 /* Update datapath statistics. */
295 u64_stats_update_begin(&stats->syncp);
296 (*stats_counter)++;
297 stats->n_mask_hit += n_mask_hit;
298 u64_stats_update_end(&stats->syncp);
299}
300
301int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
302 const struct sw_flow_key *key,
303 const struct dp_upcall_info *upcall_info,
304 uint32_t cutlen)
305{
306 struct dp_stats_percpu *stats;
307 int err;
308
309 if (upcall_info->portid == 0) {
310 err = -ENOTCONN;
311 goto err;
312 }
313
314 if (!skb_is_gso(skb))
315 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
316 else
317 err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
318 if (err)
319 goto err;
320
321 return 0;
322
323err:
324 stats = this_cpu_ptr(dp->stats_percpu);
325
326 u64_stats_update_begin(&stats->syncp);
327 stats->n_lost++;
328 u64_stats_update_end(&stats->syncp);
329
330 return err;
331}
332
333static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
334 const struct sw_flow_key *key,
335 const struct dp_upcall_info *upcall_info,
336 uint32_t cutlen)
337{
338 unsigned short gso_type = skb_shinfo(skb)->gso_type;
339 struct sw_flow_key later_key;
340 struct sk_buff *segs, *nskb;
341 int err;
342
343 BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
344 segs = __skb_gso_segment(skb, NETIF_F_SG, false);
345 if (IS_ERR(segs))
346 return PTR_ERR(segs);
347 if (segs == NULL)
348 return -EINVAL;
349
350 if (gso_type & SKB_GSO_UDP) {
351 /* The initial flow key extracted by ovs_flow_key_extract()
352 * in this case is for a first fragment, so we need to
353 * properly mark later fragments.
354 */
355 later_key = *key;
356 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
357 }
358
359 /* Queue all of the segments. */
360 skb = segs;
361 do {
362 if (gso_type & SKB_GSO_UDP && skb != segs)
363 key = &later_key;
364
365 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
366 if (err)
367 break;
368
369 } while ((skb = skb->next));
370
371 /* Free all of the segments. */
372 skb = segs;
373 do {
374 nskb = skb->next;
375 if (err)
376 kfree_skb(skb);
377 else
378 consume_skb(skb);
379 } while ((skb = nskb));
380 return err;
381}
382
383static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
384 unsigned int hdrlen)
385{
386 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
387 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
388 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
389 + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
390
391 /* OVS_PACKET_ATTR_USERDATA */
392 if (upcall_info->userdata)
393 size += NLA_ALIGN(upcall_info->userdata->nla_len);
394
395 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
396 if (upcall_info->egress_tun_info)
397 size += nla_total_size(ovs_tun_key_attr_size());
398
399 /* OVS_PACKET_ATTR_ACTIONS */
400 if (upcall_info->actions_len)
401 size += nla_total_size(upcall_info->actions_len);
402
403 /* OVS_PACKET_ATTR_MRU */
404 if (upcall_info->mru)
405 size += nla_total_size(sizeof(upcall_info->mru));
406
407 return size;
408}
409
410static void pad_packet(struct datapath *dp, struct sk_buff *skb)
411{
412 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
413 size_t plen = NLA_ALIGN(skb->len) - skb->len;
414
415 if (plen > 0)
416 memset(skb_put(skb, plen), 0, plen);
417 }
418}
419
420static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
421 const struct sw_flow_key *key,
422 const struct dp_upcall_info *upcall_info,
423 uint32_t cutlen)
424{
425 struct ovs_header *upcall;
426 struct sk_buff *nskb = NULL;
427 struct sk_buff *user_skb = NULL; /* to be queued to userspace */
428 struct nlattr *nla;
429 size_t len;
430 unsigned int hlen;
431 int err, dp_ifindex;
432
433 dp_ifindex = get_dpifindex(dp);
434 if (!dp_ifindex)
435 return -ENODEV;
436
437 if (skb_vlan_tag_present(skb)) {
438 nskb = skb_clone(skb, GFP_ATOMIC);
439 if (!nskb)
440 return -ENOMEM;
441
442 nskb = __vlan_hwaccel_push_inside(nskb);
443 if (!nskb)
444 return -ENOMEM;
445
446 skb = nskb;
447 }
448
449 if (nla_attr_size(skb->len) > USHRT_MAX) {
450 err = -EFBIG;
451 goto out;
452 }
453
454 /* Complete checksum if needed */
455 if (skb->ip_summed == CHECKSUM_PARTIAL &&
456 (err = skb_checksum_help(skb)))
457 goto out;
458
459 /* Older versions of OVS user space enforce alignment of the last
460 * Netlink attribute to NLA_ALIGNTO which would require extensive
461 * padding logic. Only perform zerocopy if padding is not required.
462 */
463 if (dp->user_features & OVS_DP_F_UNALIGNED)
464 hlen = skb_zerocopy_headlen(skb);
465 else
466 hlen = skb->len;
467
468 len = upcall_msg_size(upcall_info, hlen - cutlen);
469 user_skb = genlmsg_new(len, GFP_ATOMIC);
470 if (!user_skb) {
471 err = -ENOMEM;
472 goto out;
473 }
474
475 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
476 0, upcall_info->cmd);
477 upcall->dp_ifindex = dp_ifindex;
478
479 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
480 BUG_ON(err);
481
482 if (upcall_info->userdata)
483 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
484 nla_len(upcall_info->userdata),
485 nla_data(upcall_info->userdata));
486
487 if (upcall_info->egress_tun_info) {
488 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
489 err = ovs_nla_put_tunnel_info(user_skb,
490 upcall_info->egress_tun_info);
491 BUG_ON(err);
492 nla_nest_end(user_skb, nla);
493 }
494
495 if (upcall_info->actions_len) {
496 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
497 err = ovs_nla_put_actions(upcall_info->actions,
498 upcall_info->actions_len,
499 user_skb);
500 if (!err)
501 nla_nest_end(user_skb, nla);
502 else
503 nla_nest_cancel(user_skb, nla);
504 }
505
506 /* Add OVS_PACKET_ATTR_MRU */
507 if (upcall_info->mru) {
508 if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
509 upcall_info->mru)) {
510 err = -ENOBUFS;
511 goto out;
512 }
513 pad_packet(dp, user_skb);
514 }
515
516 /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
517 if (cutlen > 0) {
518 if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
519 skb->len)) {
520 err = -ENOBUFS;
521 goto out;
522 }
523 pad_packet(dp, user_skb);
524 }
525
526 /* Only reserve room for attribute header, packet data is added
527 * in skb_zerocopy() */
528 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
529 err = -ENOBUFS;
530 goto out;
531 }
532 nla->nla_len = nla_attr_size(skb->len - cutlen);
533
534 err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
535 if (err)
536 goto out;
537
538 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
539 pad_packet(dp, user_skb);
540
541 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
542
543 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
544 user_skb = NULL;
545out:
546 if (err)
547 skb_tx_error(skb);
548 kfree_skb(user_skb);
549 kfree_skb(nskb);
550 return err;
551}
552
553static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
554{
555 struct ovs_header *ovs_header = info->userhdr;
556 struct net *net = sock_net(skb->sk);
557 struct nlattr **a = info->attrs;
558 struct sw_flow_actions *acts;
559 struct sk_buff *packet;
560 struct sw_flow *flow;
561 struct sw_flow_actions *sf_acts;
562 struct datapath *dp;
563 struct vport *input_vport;
564 u16 mru = 0;
565 int len;
566 int err;
567 bool log = !a[OVS_PACKET_ATTR_PROBE];
568
569 err = -EINVAL;
570 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
571 !a[OVS_PACKET_ATTR_ACTIONS])
572 goto err;
573
574 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
575 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
576 err = -ENOMEM;
577 if (!packet)
578 goto err;
579 skb_reserve(packet, NET_IP_ALIGN);
580
581 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
582
583 /* Set packet's mru */
584 if (a[OVS_PACKET_ATTR_MRU]) {
585 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
586 packet->ignore_df = 1;
587 }
588 OVS_CB(packet)->mru = mru;
589
590 /* Build an sw_flow for sending this packet. */
591 flow = ovs_flow_alloc();
592 err = PTR_ERR(flow);
593 if (IS_ERR(flow))
594 goto err_kfree_skb;
595
596 err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
597 packet, &flow->key, log);
598 if (err)
599 goto err_flow_free;
600
601 err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
602 &flow->key, &acts, log);
603 if (err)
604 goto err_flow_free;
605
606 rcu_assign_pointer(flow->sf_acts, acts);
607 packet->priority = flow->key.phy.priority;
608 packet->mark = flow->key.phy.skb_mark;
609
610 rcu_read_lock();
611 dp = get_dp_rcu(net, ovs_header->dp_ifindex);
612 err = -ENODEV;
613 if (!dp)
614 goto err_unlock;
615
616 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
617 if (!input_vport)
618 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
619
620 if (!input_vport)
621 goto err_unlock;
622
623 packet->dev = input_vport->dev;
624 OVS_CB(packet)->input_vport = input_vport;
625 sf_acts = rcu_dereference(flow->sf_acts);
626
627 local_bh_disable();
628 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
629 local_bh_enable();
630 rcu_read_unlock();
631
632 ovs_flow_free(flow, false);
633 return err;
634
635err_unlock:
636 rcu_read_unlock();
637err_flow_free:
638 ovs_flow_free(flow, false);
639err_kfree_skb:
640 kfree_skb(packet);
641err:
642 return err;
643}
644
645static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
646 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
647 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
648 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
649 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
650 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
651};
652
653static const struct genl_ops dp_packet_genl_ops[] = {
654 { .cmd = OVS_PACKET_CMD_EXECUTE,
655 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
656 .policy = packet_policy,
657 .doit = ovs_packet_cmd_execute
658 }
659};
660
661static struct genl_family dp_packet_genl_family __ro_after_init = {
662 .hdrsize = sizeof(struct ovs_header),
663 .name = OVS_PACKET_FAMILY,
664 .version = OVS_PACKET_VERSION,
665 .maxattr = OVS_PACKET_ATTR_MAX,
666 .netnsok = true,
667 .parallel_ops = true,
668 .ops = dp_packet_genl_ops,
669 .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
670 .module = THIS_MODULE,
671};
672
673static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
674 struct ovs_dp_megaflow_stats *mega_stats)
675{
676 int i;
677
678 memset(mega_stats, 0, sizeof(*mega_stats));
679
680 stats->n_flows = ovs_flow_tbl_count(&dp->table);
681 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
682
683 stats->n_hit = stats->n_missed = stats->n_lost = 0;
684
685 for_each_possible_cpu(i) {
686 const struct dp_stats_percpu *percpu_stats;
687 struct dp_stats_percpu local_stats;
688 unsigned int start;
689
690 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
691
692 do {
693 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
694 local_stats = *percpu_stats;
695 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
696
697 stats->n_hit += local_stats.n_hit;
698 stats->n_missed += local_stats.n_missed;
699 stats->n_lost += local_stats.n_lost;
700 mega_stats->n_mask_hit += local_stats.n_mask_hit;
701 }
702}
703
704static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
705{
706 return ovs_identifier_is_ufid(sfid) &&
707 !(ufid_flags & OVS_UFID_F_OMIT_KEY);
708}
709
710static bool should_fill_mask(uint32_t ufid_flags)
711{
712 return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
713}
714
715static bool should_fill_actions(uint32_t ufid_flags)
716{
717 return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
718}
719
720static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
721 const struct sw_flow_id *sfid,
722 uint32_t ufid_flags)
723{
724 size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
725
726 /* OVS_FLOW_ATTR_UFID */
727 if (sfid && ovs_identifier_is_ufid(sfid))
728 len += nla_total_size(sfid->ufid_len);
729
730 /* OVS_FLOW_ATTR_KEY */
731 if (!sfid || should_fill_key(sfid, ufid_flags))
732 len += nla_total_size(ovs_key_attr_size());
733
734 /* OVS_FLOW_ATTR_MASK */
735 if (should_fill_mask(ufid_flags))
736 len += nla_total_size(ovs_key_attr_size());
737
738 /* OVS_FLOW_ATTR_ACTIONS */
739 if (should_fill_actions(ufid_flags))
740 len += nla_total_size(acts->orig_len);
741
742 return len
743 + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
744 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
745 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
746}
747
748/* Called with ovs_mutex or RCU read lock. */
749static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
750 struct sk_buff *skb)
751{
752 struct ovs_flow_stats stats;
753 __be16 tcp_flags;
754 unsigned long used;
755
756 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
757
758 if (used &&
759 nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
760 OVS_FLOW_ATTR_PAD))
761 return -EMSGSIZE;
762
763 if (stats.n_packets &&
764 nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
765 sizeof(struct ovs_flow_stats), &stats,
766 OVS_FLOW_ATTR_PAD))
767 return -EMSGSIZE;
768
769 if ((u8)ntohs(tcp_flags) &&
770 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
771 return -EMSGSIZE;
772
773 return 0;
774}
775
776/* Called with ovs_mutex or RCU read lock. */
777static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
778 struct sk_buff *skb, int skb_orig_len)
779{
780 struct nlattr *start;
781 int err;
782
783 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
784 * this is the first flow to be dumped into 'skb'. This is unusual for
785 * Netlink but individual action lists can be longer than
786 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
787 * The userspace caller can always fetch the actions separately if it
788 * really wants them. (Most userspace callers in fact don't care.)
789 *
790 * This can only fail for dump operations because the skb is always
791 * properly sized for single flows.
792 */
793 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
794 if (start) {
795 const struct sw_flow_actions *sf_acts;
796
797 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
798 err = ovs_nla_put_actions(sf_acts->actions,
799 sf_acts->actions_len, skb);
800
801 if (!err)
802 nla_nest_end(skb, start);
803 else {
804 if (skb_orig_len)
805 return err;
806
807 nla_nest_cancel(skb, start);
808 }
809 } else if (skb_orig_len) {
810 return -EMSGSIZE;
811 }
812
813 return 0;
814}
815
816/* Called with ovs_mutex or RCU read lock. */
817static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
818 struct sk_buff *skb, u32 portid,
819 u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
820{
821 const int skb_orig_len = skb->len;
822 struct ovs_header *ovs_header;
823 int err;
824
825 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
826 flags, cmd);
827 if (!ovs_header)
828 return -EMSGSIZE;
829
830 ovs_header->dp_ifindex = dp_ifindex;
831
832 err = ovs_nla_put_identifier(flow, skb);
833 if (err)
834 goto error;
835
836 if (should_fill_key(&flow->id, ufid_flags)) {
837 err = ovs_nla_put_masked_key(flow, skb);
838 if (err)
839 goto error;
840 }
841
842 if (should_fill_mask(ufid_flags)) {
843 err = ovs_nla_put_mask(flow, skb);
844 if (err)
845 goto error;
846 }
847
848 err = ovs_flow_cmd_fill_stats(flow, skb);
849 if (err)
850 goto error;
851
852 if (should_fill_actions(ufid_flags)) {
853 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
854 if (err)
855 goto error;
856 }
857
858 genlmsg_end(skb, ovs_header);
859 return 0;
860
861error:
862 genlmsg_cancel(skb, ovs_header);
863 return err;
864}
865
866/* May not be called with RCU read lock. */
867static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
868 const struct sw_flow_id *sfid,
869 struct genl_info *info,
870 bool always,
871 uint32_t ufid_flags)
872{
873 struct sk_buff *skb;
874 size_t len;
875
876 if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
877 return NULL;
878
879 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
880 skb = genlmsg_new(len, GFP_KERNEL);
881 if (!skb)
882 return ERR_PTR(-ENOMEM);
883
884 return skb;
885}
886
887/* Called with ovs_mutex. */
888static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
889 int dp_ifindex,
890 struct genl_info *info, u8 cmd,
891 bool always, u32 ufid_flags)
892{
893 struct sk_buff *skb;
894 int retval;
895
896 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
897 &flow->id, info, always, ufid_flags);
898 if (IS_ERR_OR_NULL(skb))
899 return skb;
900
901 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
902 info->snd_portid, info->snd_seq, 0,
903 cmd, ufid_flags);
904 BUG_ON(retval < 0);
905 return skb;
906}
907
908static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
909{
910 struct net *net = sock_net(skb->sk);
911 struct nlattr **a = info->attrs;
912 struct ovs_header *ovs_header = info->userhdr;
913 struct sw_flow *flow = NULL, *new_flow;
914 struct sw_flow_mask mask;
915 struct sk_buff *reply;
916 struct datapath *dp;
917 struct sw_flow_actions *acts;
918 struct sw_flow_match match;
919 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
920 int error;
921 bool log = !a[OVS_FLOW_ATTR_PROBE];
922
923 /* Must have key and actions. */
924 error = -EINVAL;
925 if (!a[OVS_FLOW_ATTR_KEY]) {
926 OVS_NLERR(log, "Flow key attr not present in new flow.");
927 goto error;
928 }
929 if (!a[OVS_FLOW_ATTR_ACTIONS]) {
930 OVS_NLERR(log, "Flow actions attr not present in new flow.");
931 goto error;
932 }
933
934 /* Most of the time we need to allocate a new flow, do it before
935 * locking.
936 */
937 new_flow = ovs_flow_alloc();
938 if (IS_ERR(new_flow)) {
939 error = PTR_ERR(new_flow);
940 goto error;
941 }
942
943 /* Extract key. */
944 ovs_match_init(&match, &new_flow->key, false, &mask);
945 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
946 a[OVS_FLOW_ATTR_MASK], log);
947 if (error)
948 goto err_kfree_flow;
949
950 /* Extract flow identifier. */
951 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
952 &new_flow->key, log);
953 if (error)
954 goto err_kfree_flow;
955
956 /* unmasked key is needed to match when ufid is not used. */
957 if (ovs_identifier_is_key(&new_flow->id))
958 match.key = new_flow->id.unmasked_key;
959
960 ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
961
962 /* Validate actions. */
963 error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
964 &new_flow->key, &acts, log);
965 if (error) {
966 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
967 goto err_kfree_flow;
968 }
969
970 reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
971 ufid_flags);
972 if (IS_ERR(reply)) {
973 error = PTR_ERR(reply);
974 goto err_kfree_acts;
975 }
976
977 ovs_lock();
978 dp = get_dp(net, ovs_header->dp_ifindex);
979 if (unlikely(!dp)) {
980 error = -ENODEV;
981 goto err_unlock_ovs;
982 }
983
984 /* Check if this is a duplicate flow */
985 if (ovs_identifier_is_ufid(&new_flow->id))
986 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
987 if (!flow)
988 flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
989 if (likely(!flow)) {
990 rcu_assign_pointer(new_flow->sf_acts, acts);
991
992 /* Put flow in bucket. */
993 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
994 if (unlikely(error)) {
995 acts = NULL;
996 goto err_unlock_ovs;
997 }
998
999 if (unlikely(reply)) {
1000 error = ovs_flow_cmd_fill_info(new_flow,
1001 ovs_header->dp_ifindex,
1002 reply, info->snd_portid,
1003 info->snd_seq, 0,
1004 OVS_FLOW_CMD_NEW,
1005 ufid_flags);
1006 BUG_ON(error < 0);
1007 }
1008 ovs_unlock();
1009 } else {
1010 struct sw_flow_actions *old_acts;
1011
1012 /* Bail out if we're not allowed to modify an existing flow.
1013 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1014 * because Generic Netlink treats the latter as a dump
1015 * request. We also accept NLM_F_EXCL in case that bug ever
1016 * gets fixed.
1017 */
1018 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1019 | NLM_F_EXCL))) {
1020 error = -EEXIST;
1021 goto err_unlock_ovs;
1022 }
1023 /* The flow identifier has to be the same for flow updates.
1024 * Look for any overlapping flow.
1025 */
1026 if (unlikely(!ovs_flow_cmp(flow, &match))) {
1027 if (ovs_identifier_is_key(&flow->id))
1028 flow = ovs_flow_tbl_lookup_exact(&dp->table,
1029 &match);
1030 else /* UFID matches but key is different */
1031 flow = NULL;
1032 if (!flow) {
1033 error = -ENOENT;
1034 goto err_unlock_ovs;
1035 }
1036 }
1037 /* Update actions. */
1038 old_acts = ovsl_dereference(flow->sf_acts);
1039 rcu_assign_pointer(flow->sf_acts, acts);
1040
1041 if (unlikely(reply)) {
1042 error = ovs_flow_cmd_fill_info(flow,
1043 ovs_header->dp_ifindex,
1044 reply, info->snd_portid,
1045 info->snd_seq, 0,
1046 OVS_FLOW_CMD_NEW,
1047 ufid_flags);
1048 BUG_ON(error < 0);
1049 }
1050 ovs_unlock();
1051
1052 ovs_nla_free_flow_actions_rcu(old_acts);
1053 ovs_flow_free(new_flow, false);
1054 }
1055
1056 if (reply)
1057 ovs_notify(&dp_flow_genl_family, reply, info);
1058 return 0;
1059
1060err_unlock_ovs:
1061 ovs_unlock();
1062 kfree_skb(reply);
1063err_kfree_acts:
1064 ovs_nla_free_flow_actions(acts);
1065err_kfree_flow:
1066 ovs_flow_free(new_flow, false);
1067error:
1068 return error;
1069}
1070
1071/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1072static struct sw_flow_actions *get_flow_actions(struct net *net,
1073 const struct nlattr *a,
1074 const struct sw_flow_key *key,
1075 const struct sw_flow_mask *mask,
1076 bool log)
1077{
1078 struct sw_flow_actions *acts;
1079 struct sw_flow_key masked_key;
1080 int error;
1081
1082 ovs_flow_mask_key(&masked_key, key, true, mask);
1083 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1084 if (error) {
1085 OVS_NLERR(log,
1086 "Actions may not be safe on all matching packets");
1087 return ERR_PTR(error);
1088 }
1089
1090 return acts;
1091}
1092
1093static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1094{
1095 struct net *net = sock_net(skb->sk);
1096 struct nlattr **a = info->attrs;
1097 struct ovs_header *ovs_header = info->userhdr;
1098 struct sw_flow_key key;
1099 struct sw_flow *flow;
1100 struct sw_flow_mask mask;
1101 struct sk_buff *reply = NULL;
1102 struct datapath *dp;
1103 struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1104 struct sw_flow_match match;
1105 struct sw_flow_id sfid;
1106 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1107 int error = 0;
1108 bool log = !a[OVS_FLOW_ATTR_PROBE];
1109 bool ufid_present;
1110
1111 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1112 if (a[OVS_FLOW_ATTR_KEY]) {
1113 ovs_match_init(&match, &key, true, &mask);
1114 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1115 a[OVS_FLOW_ATTR_MASK], log);
1116 } else if (!ufid_present) {
1117 OVS_NLERR(log,
1118 "Flow set message rejected, Key attribute missing.");
1119 error = -EINVAL;
1120 }
1121 if (error)
1122 goto error;
1123
1124 /* Validate actions. */
1125 if (a[OVS_FLOW_ATTR_ACTIONS]) {
1126 if (!a[OVS_FLOW_ATTR_KEY]) {
1127 OVS_NLERR(log,
1128 "Flow key attribute not present in set flow.");
1129 error = -EINVAL;
1130 goto error;
1131 }
1132
1133 acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
1134 &mask, log);
1135 if (IS_ERR(acts)) {
1136 error = PTR_ERR(acts);
1137 goto error;
1138 }
1139
1140 /* Can allocate before locking if have acts. */
1141 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1142 ufid_flags);
1143 if (IS_ERR(reply)) {
1144 error = PTR_ERR(reply);
1145 goto err_kfree_acts;
1146 }
1147 }
1148
1149 ovs_lock();
1150 dp = get_dp(net, ovs_header->dp_ifindex);
1151 if (unlikely(!dp)) {
1152 error = -ENODEV;
1153 goto err_unlock_ovs;
1154 }
1155 /* Check that the flow exists. */
1156 if (ufid_present)
1157 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1158 else
1159 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1160 if (unlikely(!flow)) {
1161 error = -ENOENT;
1162 goto err_unlock_ovs;
1163 }
1164
1165 /* Update actions, if present. */
1166 if (likely(acts)) {
1167 old_acts = ovsl_dereference(flow->sf_acts);
1168 rcu_assign_pointer(flow->sf_acts, acts);
1169
1170 if (unlikely(reply)) {
1171 error = ovs_flow_cmd_fill_info(flow,
1172 ovs_header->dp_ifindex,
1173 reply, info->snd_portid,
1174 info->snd_seq, 0,
1175 OVS_FLOW_CMD_NEW,
1176 ufid_flags);
1177 BUG_ON(error < 0);
1178 }
1179 } else {
1180 /* Could not alloc without acts before locking. */
1181 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1182 info, OVS_FLOW_CMD_NEW, false,
1183 ufid_flags);
1184
1185 if (IS_ERR(reply)) {
1186 error = PTR_ERR(reply);
1187 goto err_unlock_ovs;
1188 }
1189 }
1190
1191 /* Clear stats. */
1192 if (a[OVS_FLOW_ATTR_CLEAR])
1193 ovs_flow_stats_clear(flow);
1194 ovs_unlock();
1195
1196 if (reply)
1197 ovs_notify(&dp_flow_genl_family, reply, info);
1198 if (old_acts)
1199 ovs_nla_free_flow_actions_rcu(old_acts);
1200
1201 return 0;
1202
1203err_unlock_ovs:
1204 ovs_unlock();
1205 kfree_skb(reply);
1206err_kfree_acts:
1207 ovs_nla_free_flow_actions(acts);
1208error:
1209 return error;
1210}
1211
1212static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1213{
1214 struct nlattr **a = info->attrs;
1215 struct ovs_header *ovs_header = info->userhdr;
1216 struct net *net = sock_net(skb->sk);
1217 struct sw_flow_key key;
1218 struct sk_buff *reply;
1219 struct sw_flow *flow;
1220 struct datapath *dp;
1221 struct sw_flow_match match;
1222 struct sw_flow_id ufid;
1223 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1224 int err = 0;
1225 bool log = !a[OVS_FLOW_ATTR_PROBE];
1226 bool ufid_present;
1227
1228 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1229 if (a[OVS_FLOW_ATTR_KEY]) {
1230 ovs_match_init(&match, &key, true, NULL);
1231 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1232 log);
1233 } else if (!ufid_present) {
1234 OVS_NLERR(log,
1235 "Flow get message rejected, Key attribute missing.");
1236 err = -EINVAL;
1237 }
1238 if (err)
1239 return err;
1240
1241 ovs_lock();
1242 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1243 if (!dp) {
1244 err = -ENODEV;
1245 goto unlock;
1246 }
1247
1248 if (ufid_present)
1249 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1250 else
1251 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1252 if (!flow) {
1253 err = -ENOENT;
1254 goto unlock;
1255 }
1256
1257 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1258 OVS_FLOW_CMD_NEW, true, ufid_flags);
1259 if (IS_ERR(reply)) {
1260 err = PTR_ERR(reply);
1261 goto unlock;
1262 }
1263
1264 ovs_unlock();
1265 return genlmsg_reply(reply, info);
1266unlock:
1267 ovs_unlock();
1268 return err;
1269}
1270
1271static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1272{
1273 struct nlattr **a = info->attrs;
1274 struct ovs_header *ovs_header = info->userhdr;
1275 struct net *net = sock_net(skb->sk);
1276 struct sw_flow_key key;
1277 struct sk_buff *reply;
1278 struct sw_flow *flow = NULL;
1279 struct datapath *dp;
1280 struct sw_flow_match match;
1281 struct sw_flow_id ufid;
1282 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1283 int err;
1284 bool log = !a[OVS_FLOW_ATTR_PROBE];
1285 bool ufid_present;
1286
1287 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1288 if (a[OVS_FLOW_ATTR_KEY]) {
1289 ovs_match_init(&match, &key, true, NULL);
1290 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1291 NULL, log);
1292 if (unlikely(err))
1293 return err;
1294 }
1295
1296 ovs_lock();
1297 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1298 if (unlikely(!dp)) {
1299 err = -ENODEV;
1300 goto unlock;
1301 }
1302
1303 if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1304 err = ovs_flow_tbl_flush(&dp->table);
1305 goto unlock;
1306 }
1307
1308 if (ufid_present)
1309 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1310 else
1311 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1312 if (unlikely(!flow)) {
1313 err = -ENOENT;
1314 goto unlock;
1315 }
1316
1317 ovs_flow_tbl_remove(&dp->table, flow);
1318 ovs_unlock();
1319
1320 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1321 &flow->id, info, false, ufid_flags);
1322 if (likely(reply)) {
1323 if (likely(!IS_ERR(reply))) {
1324 rcu_read_lock(); /*To keep RCU checker happy. */
1325 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1326 reply, info->snd_portid,
1327 info->snd_seq, 0,
1328 OVS_FLOW_CMD_DEL,
1329 ufid_flags);
1330 rcu_read_unlock();
1331 BUG_ON(err < 0);
1332
1333 ovs_notify(&dp_flow_genl_family, reply, info);
1334 } else {
1335 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
1336 }
1337 }
1338
1339 ovs_flow_free(flow, true);
1340 return 0;
1341unlock:
1342 ovs_unlock();
1343 return err;
1344}
1345
1346static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1347{
1348 struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1349 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1350 struct table_instance *ti;
1351 struct datapath *dp;
1352 u32 ufid_flags;
1353 int err;
1354
1355 err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
1356 OVS_FLOW_ATTR_MAX, flow_policy);
1357 if (err)
1358 return err;
1359 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1360
1361 rcu_read_lock();
1362 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1363 if (!dp) {
1364 rcu_read_unlock();
1365 return -ENODEV;
1366 }
1367
1368 ti = rcu_dereference(dp->table.ti);
1369 for (;;) {
1370 struct sw_flow *flow;
1371 u32 bucket, obj;
1372
1373 bucket = cb->args[0];
1374 obj = cb->args[1];
1375 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1376 if (!flow)
1377 break;
1378
1379 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1380 NETLINK_CB(cb->skb).portid,
1381 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1382 OVS_FLOW_CMD_NEW, ufid_flags) < 0)
1383 break;
1384
1385 cb->args[0] = bucket;
1386 cb->args[1] = obj;
1387 }
1388 rcu_read_unlock();
1389 return skb->len;
1390}
1391
1392static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1393 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1394 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1395 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1396 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1397 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1398 [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1399 [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1400};
1401
1402static const struct genl_ops dp_flow_genl_ops[] = {
1403 { .cmd = OVS_FLOW_CMD_NEW,
1404 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1405 .policy = flow_policy,
1406 .doit = ovs_flow_cmd_new
1407 },
1408 { .cmd = OVS_FLOW_CMD_DEL,
1409 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1410 .policy = flow_policy,
1411 .doit = ovs_flow_cmd_del
1412 },
1413 { .cmd = OVS_FLOW_CMD_GET,
1414 .flags = 0, /* OK for unprivileged users. */
1415 .policy = flow_policy,
1416 .doit = ovs_flow_cmd_get,
1417 .dumpit = ovs_flow_cmd_dump
1418 },
1419 { .cmd = OVS_FLOW_CMD_SET,
1420 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1421 .policy = flow_policy,
1422 .doit = ovs_flow_cmd_set,
1423 },
1424};
1425
1426static struct genl_family dp_flow_genl_family __ro_after_init = {
1427 .hdrsize = sizeof(struct ovs_header),
1428 .name = OVS_FLOW_FAMILY,
1429 .version = OVS_FLOW_VERSION,
1430 .maxattr = OVS_FLOW_ATTR_MAX,
1431 .netnsok = true,
1432 .parallel_ops = true,
1433 .ops = dp_flow_genl_ops,
1434 .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1435 .mcgrps = &ovs_dp_flow_multicast_group,
1436 .n_mcgrps = 1,
1437 .module = THIS_MODULE,
1438};
1439
1440static size_t ovs_dp_cmd_msg_size(void)
1441{
1442 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1443
1444 msgsize += nla_total_size(IFNAMSIZ);
1445 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1446 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1447 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1448
1449 return msgsize;
1450}
1451
1452/* Called with ovs_mutex. */
1453static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1454 u32 portid, u32 seq, u32 flags, u8 cmd)
1455{
1456 struct ovs_header *ovs_header;
1457 struct ovs_dp_stats dp_stats;
1458 struct ovs_dp_megaflow_stats dp_megaflow_stats;
1459 int err;
1460
1461 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1462 flags, cmd);
1463 if (!ovs_header)
1464 goto error;
1465
1466 ovs_header->dp_ifindex = get_dpifindex(dp);
1467
1468 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1469 if (err)
1470 goto nla_put_failure;
1471
1472 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1473 if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1474 &dp_stats, OVS_DP_ATTR_PAD))
1475 goto nla_put_failure;
1476
1477 if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1478 sizeof(struct ovs_dp_megaflow_stats),
1479 &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1480 goto nla_put_failure;
1481
1482 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1483 goto nla_put_failure;
1484
1485 genlmsg_end(skb, ovs_header);
1486 return 0;
1487
1488nla_put_failure:
1489 genlmsg_cancel(skb, ovs_header);
1490error:
1491 return -EMSGSIZE;
1492}
1493
1494static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1495{
1496 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1497}
1498
1499/* Called with rcu_read_lock or ovs_mutex. */
1500static struct datapath *lookup_datapath(struct net *net,
1501 const struct ovs_header *ovs_header,
1502 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1503{
1504 struct datapath *dp;
1505
1506 if (!a[OVS_DP_ATTR_NAME])
1507 dp = get_dp(net, ovs_header->dp_ifindex);
1508 else {
1509 struct vport *vport;
1510
1511 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1512 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1513 }
1514 return dp ? dp : ERR_PTR(-ENODEV);
1515}
1516
1517static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1518{
1519 struct datapath *dp;
1520
1521 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1522 if (IS_ERR(dp))
1523 return;
1524
1525 WARN(dp->user_features, "Dropping previously announced user features\n");
1526 dp->user_features = 0;
1527}
1528
1529static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1530{
1531 if (a[OVS_DP_ATTR_USER_FEATURES])
1532 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1533}
1534
1535static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1536{
1537 struct nlattr **a = info->attrs;
1538 struct vport_parms parms;
1539 struct sk_buff *reply;
1540 struct datapath *dp;
1541 struct vport *vport;
1542 struct ovs_net *ovs_net;
1543 int err, i;
1544
1545 err = -EINVAL;
1546 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1547 goto err;
1548
1549 reply = ovs_dp_cmd_alloc_info();
1550 if (!reply)
1551 return -ENOMEM;
1552
1553 err = -ENOMEM;
1554 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1555 if (dp == NULL)
1556 goto err_free_reply;
1557
1558 ovs_dp_set_net(dp, sock_net(skb->sk));
1559
1560 /* Allocate table. */
1561 err = ovs_flow_tbl_init(&dp->table);
1562 if (err)
1563 goto err_free_dp;
1564
1565 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1566 if (!dp->stats_percpu) {
1567 err = -ENOMEM;
1568 goto err_destroy_table;
1569 }
1570
1571 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1572 GFP_KERNEL);
1573 if (!dp->ports) {
1574 err = -ENOMEM;
1575 goto err_destroy_percpu;
1576 }
1577
1578 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1579 INIT_HLIST_HEAD(&dp->ports[i]);
1580
1581 /* Set up our datapath device. */
1582 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1583 parms.type = OVS_VPORT_TYPE_INTERNAL;
1584 parms.options = NULL;
1585 parms.dp = dp;
1586 parms.port_no = OVSP_LOCAL;
1587 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1588
1589 ovs_dp_change(dp, a);
1590
1591 /* So far only local changes have been made, now need the lock. */
1592 ovs_lock();
1593
1594 vport = new_vport(&parms);
1595 if (IS_ERR(vport)) {
1596 err = PTR_ERR(vport);
1597 if (err == -EBUSY)
1598 err = -EEXIST;
1599
1600 if (err == -EEXIST) {
1601 /* An outdated user space instance that does not understand
1602 * the concept of user_features has attempted to create a new
1603 * datapath and is likely to reuse it. Drop all user features.
1604 */
1605 if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1606 ovs_dp_reset_user_features(skb, info);
1607 }
1608
1609 goto err_destroy_ports_array;
1610 }
1611
1612 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1613 info->snd_seq, 0, OVS_DP_CMD_NEW);
1614 BUG_ON(err < 0);
1615
1616 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1617 list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1618
1619 ovs_unlock();
1620
1621 ovs_notify(&dp_datapath_genl_family, reply, info);
1622 return 0;
1623
1624err_destroy_ports_array:
1625 ovs_unlock();
1626 kfree(dp->ports);
1627err_destroy_percpu:
1628 free_percpu(dp->stats_percpu);
1629err_destroy_table:
1630 ovs_flow_tbl_destroy(&dp->table);
1631err_free_dp:
1632 kfree(dp);
1633err_free_reply:
1634 kfree_skb(reply);
1635err:
1636 return err;
1637}
1638
1639/* Called with ovs_mutex. */
1640static void __dp_destroy(struct datapath *dp)
1641{
1642 int i;
1643
1644 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1645 struct vport *vport;
1646 struct hlist_node *n;
1647
1648 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1649 if (vport->port_no != OVSP_LOCAL)
1650 ovs_dp_detach_port(vport);
1651 }
1652
1653 list_del_rcu(&dp->list_node);
1654
1655 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1656 * all ports in datapath are destroyed first before freeing datapath.
1657 */
1658 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1659
1660 /* RCU destroy the flow table */
1661 call_rcu(&dp->rcu, destroy_dp_rcu);
1662}
1663
1664static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1665{
1666 struct sk_buff *reply;
1667 struct datapath *dp;
1668 int err;
1669
1670 reply = ovs_dp_cmd_alloc_info();
1671 if (!reply)
1672 return -ENOMEM;
1673
1674 ovs_lock();
1675 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1676 err = PTR_ERR(dp);
1677 if (IS_ERR(dp))
1678 goto err_unlock_free;
1679
1680 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1681 info->snd_seq, 0, OVS_DP_CMD_DEL);
1682 BUG_ON(err < 0);
1683
1684 __dp_destroy(dp);
1685 ovs_unlock();
1686
1687 ovs_notify(&dp_datapath_genl_family, reply, info);
1688
1689 return 0;
1690
1691err_unlock_free:
1692 ovs_unlock();
1693 kfree_skb(reply);
1694 return err;
1695}
1696
1697static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1698{
1699 struct sk_buff *reply;
1700 struct datapath *dp;
1701 int err;
1702
1703 reply = ovs_dp_cmd_alloc_info();
1704 if (!reply)
1705 return -ENOMEM;
1706
1707 ovs_lock();
1708 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1709 err = PTR_ERR(dp);
1710 if (IS_ERR(dp))
1711 goto err_unlock_free;
1712
1713 ovs_dp_change(dp, info->attrs);
1714
1715 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1716 info->snd_seq, 0, OVS_DP_CMD_NEW);
1717 BUG_ON(err < 0);
1718
1719 ovs_unlock();
1720 ovs_notify(&dp_datapath_genl_family, reply, info);
1721
1722 return 0;
1723
1724err_unlock_free:
1725 ovs_unlock();
1726 kfree_skb(reply);
1727 return err;
1728}
1729
1730static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1731{
1732 struct sk_buff *reply;
1733 struct datapath *dp;
1734 int err;
1735
1736 reply = ovs_dp_cmd_alloc_info();
1737 if (!reply)
1738 return -ENOMEM;
1739
1740 ovs_lock();
1741 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1742 if (IS_ERR(dp)) {
1743 err = PTR_ERR(dp);
1744 goto err_unlock_free;
1745 }
1746 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1747 info->snd_seq, 0, OVS_DP_CMD_NEW);
1748 BUG_ON(err < 0);
1749 ovs_unlock();
1750
1751 return genlmsg_reply(reply, info);
1752
1753err_unlock_free:
1754 ovs_unlock();
1755 kfree_skb(reply);
1756 return err;
1757}
1758
1759static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1760{
1761 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1762 struct datapath *dp;
1763 int skip = cb->args[0];
1764 int i = 0;
1765
1766 ovs_lock();
1767 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1768 if (i >= skip &&
1769 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1770 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1771 OVS_DP_CMD_NEW) < 0)
1772 break;
1773 i++;
1774 }
1775 ovs_unlock();
1776
1777 cb->args[0] = i;
1778
1779 return skb->len;
1780}
1781
1782static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1783 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1784 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1785 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1786};
1787
1788static const struct genl_ops dp_datapath_genl_ops[] = {
1789 { .cmd = OVS_DP_CMD_NEW,
1790 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1791 .policy = datapath_policy,
1792 .doit = ovs_dp_cmd_new
1793 },
1794 { .cmd = OVS_DP_CMD_DEL,
1795 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1796 .policy = datapath_policy,
1797 .doit = ovs_dp_cmd_del
1798 },
1799 { .cmd = OVS_DP_CMD_GET,
1800 .flags = 0, /* OK for unprivileged users. */
1801 .policy = datapath_policy,
1802 .doit = ovs_dp_cmd_get,
1803 .dumpit = ovs_dp_cmd_dump
1804 },
1805 { .cmd = OVS_DP_CMD_SET,
1806 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1807 .policy = datapath_policy,
1808 .doit = ovs_dp_cmd_set,
1809 },
1810};
1811
1812static struct genl_family dp_datapath_genl_family __ro_after_init = {
1813 .hdrsize = sizeof(struct ovs_header),
1814 .name = OVS_DATAPATH_FAMILY,
1815 .version = OVS_DATAPATH_VERSION,
1816 .maxattr = OVS_DP_ATTR_MAX,
1817 .netnsok = true,
1818 .parallel_ops = true,
1819 .ops = dp_datapath_genl_ops,
1820 .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1821 .mcgrps = &ovs_dp_datapath_multicast_group,
1822 .n_mcgrps = 1,
1823 .module = THIS_MODULE,
1824};
1825
1826/* Called with ovs_mutex or RCU read lock. */
1827static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1828 u32 portid, u32 seq, u32 flags, u8 cmd)
1829{
1830 struct ovs_header *ovs_header;
1831 struct ovs_vport_stats vport_stats;
1832 int err;
1833
1834 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1835 flags, cmd);
1836 if (!ovs_header)
1837 return -EMSGSIZE;
1838
1839 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1840
1841 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1842 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1843 nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1844 ovs_vport_name(vport)))
1845 goto nla_put_failure;
1846
1847 ovs_vport_get_stats(vport, &vport_stats);
1848 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
1849 sizeof(struct ovs_vport_stats), &vport_stats,
1850 OVS_VPORT_ATTR_PAD))
1851 goto nla_put_failure;
1852
1853 if (ovs_vport_get_upcall_portids(vport, skb))
1854 goto nla_put_failure;
1855
1856 err = ovs_vport_get_options(vport, skb);
1857 if (err == -EMSGSIZE)
1858 goto error;
1859
1860 genlmsg_end(skb, ovs_header);
1861 return 0;
1862
1863nla_put_failure:
1864 err = -EMSGSIZE;
1865error:
1866 genlmsg_cancel(skb, ovs_header);
1867 return err;
1868}
1869
1870static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1871{
1872 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1873}
1874
1875/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1876struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1877 u32 seq, u8 cmd)
1878{
1879 struct sk_buff *skb;
1880 int retval;
1881
1882 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1883 if (!skb)
1884 return ERR_PTR(-ENOMEM);
1885
1886 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1887 BUG_ON(retval < 0);
1888
1889 return skb;
1890}
1891
1892/* Called with ovs_mutex or RCU read lock. */
1893static struct vport *lookup_vport(struct net *net,
1894 const struct ovs_header *ovs_header,
1895 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1896{
1897 struct datapath *dp;
1898 struct vport *vport;
1899
1900 if (a[OVS_VPORT_ATTR_NAME]) {
1901 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1902 if (!vport)
1903 return ERR_PTR(-ENODEV);
1904 if (ovs_header->dp_ifindex &&
1905 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1906 return ERR_PTR(-ENODEV);
1907 return vport;
1908 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1909 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1910
1911 if (port_no >= DP_MAX_PORTS)
1912 return ERR_PTR(-EFBIG);
1913
1914 dp = get_dp(net, ovs_header->dp_ifindex);
1915 if (!dp)
1916 return ERR_PTR(-ENODEV);
1917
1918 vport = ovs_vport_ovsl_rcu(dp, port_no);
1919 if (!vport)
1920 return ERR_PTR(-ENODEV);
1921 return vport;
1922 } else
1923 return ERR_PTR(-EINVAL);
1924}
1925
1926/* Called with ovs_mutex */
1927static void update_headroom(struct datapath *dp)
1928{
1929 unsigned dev_headroom, max_headroom = 0;
1930 struct net_device *dev;
1931 struct vport *vport;
1932 int i;
1933
1934 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1935 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
1936 dev = vport->dev;
1937 dev_headroom = netdev_get_fwd_headroom(dev);
1938 if (dev_headroom > max_headroom)
1939 max_headroom = dev_headroom;
1940 }
1941 }
1942
1943 dp->max_headroom = max_headroom;
1944 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1945 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
1946 netdev_set_rx_headroom(vport->dev, max_headroom);
1947}
1948
1949static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1950{
1951 struct nlattr **a = info->attrs;
1952 struct ovs_header *ovs_header = info->userhdr;
1953 struct vport_parms parms;
1954 struct sk_buff *reply;
1955 struct vport *vport;
1956 struct datapath *dp;
1957 u32 port_no;
1958 int err;
1959
1960 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1961 !a[OVS_VPORT_ATTR_UPCALL_PID])
1962 return -EINVAL;
1963
1964 port_no = a[OVS_VPORT_ATTR_PORT_NO]
1965 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
1966 if (port_no >= DP_MAX_PORTS)
1967 return -EFBIG;
1968
1969 reply = ovs_vport_cmd_alloc_info();
1970 if (!reply)
1971 return -ENOMEM;
1972
1973 ovs_lock();
1974restart:
1975 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1976 err = -ENODEV;
1977 if (!dp)
1978 goto exit_unlock_free;
1979
1980 if (port_no) {
1981 vport = ovs_vport_ovsl(dp, port_no);
1982 err = -EBUSY;
1983 if (vport)
1984 goto exit_unlock_free;
1985 } else {
1986 for (port_no = 1; ; port_no++) {
1987 if (port_no >= DP_MAX_PORTS) {
1988 err = -EFBIG;
1989 goto exit_unlock_free;
1990 }
1991 vport = ovs_vport_ovsl(dp, port_no);
1992 if (!vport)
1993 break;
1994 }
1995 }
1996
1997 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1998 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1999 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2000 parms.dp = dp;
2001 parms.port_no = port_no;
2002 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2003
2004 vport = new_vport(&parms);
2005 err = PTR_ERR(vport);
2006 if (IS_ERR(vport)) {
2007 if (err == -EAGAIN)
2008 goto restart;
2009 goto exit_unlock_free;
2010 }
2011
2012 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2013 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2014
2015 if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
2016 update_headroom(dp);
2017 else
2018 netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2019
2020 BUG_ON(err < 0);
2021 ovs_unlock();
2022
2023 ovs_notify(&dp_vport_genl_family, reply, info);
2024 return 0;
2025
2026exit_unlock_free:
2027 ovs_unlock();
2028 kfree_skb(reply);
2029 return err;
2030}
2031
2032static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2033{
2034 struct nlattr **a = info->attrs;
2035 struct sk_buff *reply;
2036 struct vport *vport;
2037 int err;
2038
2039 reply = ovs_vport_cmd_alloc_info();
2040 if (!reply)
2041 return -ENOMEM;
2042
2043 ovs_lock();
2044 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2045 err = PTR_ERR(vport);
2046 if (IS_ERR(vport))
2047 goto exit_unlock_free;
2048
2049 if (a[OVS_VPORT_ATTR_TYPE] &&
2050 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2051 err = -EINVAL;
2052 goto exit_unlock_free;
2053 }
2054
2055 if (a[OVS_VPORT_ATTR_OPTIONS]) {
2056 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2057 if (err)
2058 goto exit_unlock_free;
2059 }
2060
2061
2062 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2063 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2064
2065 err = ovs_vport_set_upcall_portids(vport, ids);
2066 if (err)
2067 goto exit_unlock_free;
2068 }
2069
2070 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2071 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2072 BUG_ON(err < 0);
2073
2074 ovs_unlock();
2075 ovs_notify(&dp_vport_genl_family, reply, info);
2076 return 0;
2077
2078exit_unlock_free:
2079 ovs_unlock();
2080 kfree_skb(reply);
2081 return err;
2082}
2083
2084static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2085{
2086 bool must_update_headroom = false;
2087 struct nlattr **a = info->attrs;
2088 struct sk_buff *reply;
2089 struct datapath *dp;
2090 struct vport *vport;
2091 int err;
2092
2093 reply = ovs_vport_cmd_alloc_info();
2094 if (!reply)
2095 return -ENOMEM;
2096
2097 ovs_lock();
2098 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2099 err = PTR_ERR(vport);
2100 if (IS_ERR(vport))
2101 goto exit_unlock_free;
2102
2103 if (vport->port_no == OVSP_LOCAL) {
2104 err = -EINVAL;
2105 goto exit_unlock_free;
2106 }
2107
2108 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2109 info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2110 BUG_ON(err < 0);
2111
2112 /* the vport deletion may trigger dp headroom update */
2113 dp = vport->dp;
2114 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2115 must_update_headroom = true;
2116 netdev_reset_rx_headroom(vport->dev);
2117 ovs_dp_detach_port(vport);
2118
2119 if (must_update_headroom)
2120 update_headroom(dp);
2121 ovs_unlock();
2122
2123 ovs_notify(&dp_vport_genl_family, reply, info);
2124 return 0;
2125
2126exit_unlock_free:
2127 ovs_unlock();
2128 kfree_skb(reply);
2129 return err;
2130}
2131
2132static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2133{
2134 struct nlattr **a = info->attrs;
2135 struct ovs_header *ovs_header = info->userhdr;
2136 struct sk_buff *reply;
2137 struct vport *vport;
2138 int err;
2139
2140 reply = ovs_vport_cmd_alloc_info();
2141 if (!reply)
2142 return -ENOMEM;
2143
2144 rcu_read_lock();
2145 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2146 err = PTR_ERR(vport);
2147 if (IS_ERR(vport))
2148 goto exit_unlock_free;
2149 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2150 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2151 BUG_ON(err < 0);
2152 rcu_read_unlock();
2153
2154 return genlmsg_reply(reply, info);
2155
2156exit_unlock_free:
2157 rcu_read_unlock();
2158 kfree_skb(reply);
2159 return err;
2160}
2161
2162static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2163{
2164 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2165 struct datapath *dp;
2166 int bucket = cb->args[0], skip = cb->args[1];
2167 int i, j = 0;
2168
2169 rcu_read_lock();
2170 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2171 if (!dp) {
2172 rcu_read_unlock();
2173 return -ENODEV;
2174 }
2175 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2176 struct vport *vport;
2177
2178 j = 0;
2179 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2180 if (j >= skip &&
2181 ovs_vport_cmd_fill_info(vport, skb,
2182 NETLINK_CB(cb->skb).portid,
2183 cb->nlh->nlmsg_seq,
2184 NLM_F_MULTI,
2185 OVS_VPORT_CMD_NEW) < 0)
2186 goto out;
2187
2188 j++;
2189 }
2190 skip = 0;
2191 }
2192out:
2193 rcu_read_unlock();
2194
2195 cb->args[0] = i;
2196 cb->args[1] = j;
2197
2198 return skb->len;
2199}
2200
2201static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2202 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2203 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2204 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2205 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2206 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2207 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2208};
2209
2210static const struct genl_ops dp_vport_genl_ops[] = {
2211 { .cmd = OVS_VPORT_CMD_NEW,
2212 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2213 .policy = vport_policy,
2214 .doit = ovs_vport_cmd_new
2215 },
2216 { .cmd = OVS_VPORT_CMD_DEL,
2217 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2218 .policy = vport_policy,
2219 .doit = ovs_vport_cmd_del
2220 },
2221 { .cmd = OVS_VPORT_CMD_GET,
2222 .flags = 0, /* OK for unprivileged users. */
2223 .policy = vport_policy,
2224 .doit = ovs_vport_cmd_get,
2225 .dumpit = ovs_vport_cmd_dump
2226 },
2227 { .cmd = OVS_VPORT_CMD_SET,
2228 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2229 .policy = vport_policy,
2230 .doit = ovs_vport_cmd_set,
2231 },
2232};
2233
2234struct genl_family dp_vport_genl_family __ro_after_init = {
2235 .hdrsize = sizeof(struct ovs_header),
2236 .name = OVS_VPORT_FAMILY,
2237 .version = OVS_VPORT_VERSION,
2238 .maxattr = OVS_VPORT_ATTR_MAX,
2239 .netnsok = true,
2240 .parallel_ops = true,
2241 .ops = dp_vport_genl_ops,
2242 .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2243 .mcgrps = &ovs_dp_vport_multicast_group,
2244 .n_mcgrps = 1,
2245 .module = THIS_MODULE,
2246};
2247
2248static struct genl_family * const dp_genl_families[] = {
2249 &dp_datapath_genl_family,
2250 &dp_vport_genl_family,
2251 &dp_flow_genl_family,
2252 &dp_packet_genl_family,
2253};
2254
2255static void dp_unregister_genl(int n_families)
2256{
2257 int i;
2258
2259 for (i = 0; i < n_families; i++)
2260 genl_unregister_family(dp_genl_families[i]);
2261}
2262
2263static int __init dp_register_genl(void)
2264{
2265 int err;
2266 int i;
2267
2268 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2269
2270 err = genl_register_family(dp_genl_families[i]);
2271 if (err)
2272 goto error;
2273 }
2274
2275 return 0;
2276
2277error:
2278 dp_unregister_genl(i);
2279 return err;
2280}
2281
2282static int __net_init ovs_init_net(struct net *net)
2283{
2284 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2285
2286 INIT_LIST_HEAD(&ovs_net->dps);
2287 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2288 ovs_ct_init(net);
2289 return 0;
2290}
2291
2292static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2293 struct list_head *head)
2294{
2295 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2296 struct datapath *dp;
2297
2298 list_for_each_entry(dp, &ovs_net->dps, list_node) {
2299 int i;
2300
2301 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2302 struct vport *vport;
2303
2304 hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2305 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2306 continue;
2307
2308 if (dev_net(vport->dev) == dnet)
2309 list_add(&vport->detach_list, head);
2310 }
2311 }
2312 }
2313}
2314
2315static void __net_exit ovs_exit_net(struct net *dnet)
2316{
2317 struct datapath *dp, *dp_next;
2318 struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2319 struct vport *vport, *vport_next;
2320 struct net *net;
2321 LIST_HEAD(head);
2322
2323 ovs_ct_exit(dnet);
2324 ovs_lock();
2325 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2326 __dp_destroy(dp);
2327
2328 rtnl_lock();
2329 for_each_net(net)
2330 list_vports_from_net(net, dnet, &head);
2331 rtnl_unlock();
2332
2333 /* Detach all vports from given namespace. */
2334 list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2335 list_del(&vport->detach_list);
2336 ovs_dp_detach_port(vport);
2337 }
2338
2339 ovs_unlock();
2340
2341 cancel_work_sync(&ovs_net->dp_notify_work);
2342}
2343
2344static struct pernet_operations ovs_net_ops = {
2345 .init = ovs_init_net,
2346 .exit = ovs_exit_net,
2347 .id = &ovs_net_id,
2348 .size = sizeof(struct ovs_net),
2349};
2350
2351static int __init dp_init(void)
2352{
2353 int err;
2354
2355 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2356
2357 pr_info("Open vSwitch switching datapath\n");
2358
2359 err = action_fifos_init();
2360 if (err)
2361 goto error;
2362
2363 err = ovs_internal_dev_rtnl_link_register();
2364 if (err)
2365 goto error_action_fifos_exit;
2366
2367 err = ovs_flow_init();
2368 if (err)
2369 goto error_unreg_rtnl_link;
2370
2371 err = ovs_vport_init();
2372 if (err)
2373 goto error_flow_exit;
2374
2375 err = register_pernet_device(&ovs_net_ops);
2376 if (err)
2377 goto error_vport_exit;
2378
2379 err = register_netdevice_notifier(&ovs_dp_device_notifier);
2380 if (err)
2381 goto error_netns_exit;
2382
2383 err = ovs_netdev_init();
2384 if (err)
2385 goto error_unreg_notifier;
2386
2387 err = dp_register_genl();
2388 if (err < 0)
2389 goto error_unreg_netdev;
2390
2391 return 0;
2392
2393error_unreg_netdev:
2394 ovs_netdev_exit();
2395error_unreg_notifier:
2396 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2397error_netns_exit:
2398 unregister_pernet_device(&ovs_net_ops);
2399error_vport_exit:
2400 ovs_vport_exit();
2401error_flow_exit:
2402 ovs_flow_exit();
2403error_unreg_rtnl_link:
2404 ovs_internal_dev_rtnl_link_unregister();
2405error_action_fifos_exit:
2406 action_fifos_exit();
2407error:
2408 return err;
2409}
2410
2411static void dp_cleanup(void)
2412{
2413 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2414 ovs_netdev_exit();
2415 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2416 unregister_pernet_device(&ovs_net_ops);
2417 rcu_barrier();
2418 ovs_vport_exit();
2419 ovs_flow_exit();
2420 ovs_internal_dev_rtnl_link_unregister();
2421 action_fifos_exit();
2422}
2423
2424module_init(dp_init);
2425module_exit(dp_cleanup);
2426
2427MODULE_DESCRIPTION("Open vSwitch switching datapath");
2428MODULE_LICENSE("GPL");
2429MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2430MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2431MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2432MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
1/*
2 * Copyright (c) 2007-2014 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/init.h>
22#include <linux/module.h>
23#include <linux/if_arp.h>
24#include <linux/if_vlan.h>
25#include <linux/in.h>
26#include <linux/ip.h>
27#include <linux/jhash.h>
28#include <linux/delay.h>
29#include <linux/time.h>
30#include <linux/etherdevice.h>
31#include <linux/genetlink.h>
32#include <linux/kernel.h>
33#include <linux/kthread.h>
34#include <linux/mutex.h>
35#include <linux/percpu.h>
36#include <linux/rcupdate.h>
37#include <linux/tcp.h>
38#include <linux/udp.h>
39#include <linux/ethtool.h>
40#include <linux/wait.h>
41#include <asm/div64.h>
42#include <linux/highmem.h>
43#include <linux/netfilter_bridge.h>
44#include <linux/netfilter_ipv4.h>
45#include <linux/inetdevice.h>
46#include <linux/list.h>
47#include <linux/openvswitch.h>
48#include <linux/rculist.h>
49#include <linux/dmi.h>
50#include <net/genetlink.h>
51#include <net/net_namespace.h>
52#include <net/netns/generic.h>
53
54#include "datapath.h"
55#include "flow.h"
56#include "flow_table.h"
57#include "flow_netlink.h"
58#include "vport-internal_dev.h"
59#include "vport-netdev.h"
60
61int ovs_net_id __read_mostly;
62EXPORT_SYMBOL_GPL(ovs_net_id);
63
64static struct genl_family dp_packet_genl_family;
65static struct genl_family dp_flow_genl_family;
66static struct genl_family dp_datapath_genl_family;
67
68static const struct nla_policy flow_policy[];
69
70static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
71 .name = OVS_FLOW_MCGROUP,
72};
73
74static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
75 .name = OVS_DATAPATH_MCGROUP,
76};
77
78static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
79 .name = OVS_VPORT_MCGROUP,
80};
81
82/* Check if need to build a reply message.
83 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
84static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
85 unsigned int group)
86{
87 return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
88 genl_has_listeners(family, genl_info_net(info), group);
89}
90
91static void ovs_notify(struct genl_family *family,
92 struct sk_buff *skb, struct genl_info *info)
93{
94 genl_notify(family, skb, info, 0, GFP_KERNEL);
95}
96
97/**
98 * DOC: Locking:
99 *
100 * All writes e.g. Writes to device state (add/remove datapath, port, set
101 * operations on vports, etc.), Writes to other state (flow table
102 * modifications, set miscellaneous datapath parameters, etc.) are protected
103 * by ovs_lock.
104 *
105 * Reads are protected by RCU.
106 *
107 * There are a few special cases (mostly stats) that have their own
108 * synchronization but they nest under all of above and don't interact with
109 * each other.
110 *
111 * The RTNL lock nests inside ovs_mutex.
112 */
113
114static DEFINE_MUTEX(ovs_mutex);
115
116void ovs_lock(void)
117{
118 mutex_lock(&ovs_mutex);
119}
120
121void ovs_unlock(void)
122{
123 mutex_unlock(&ovs_mutex);
124}
125
126#ifdef CONFIG_LOCKDEP
127int lockdep_ovsl_is_held(void)
128{
129 if (debug_locks)
130 return lockdep_is_held(&ovs_mutex);
131 else
132 return 1;
133}
134EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
135#endif
136
137static struct vport *new_vport(const struct vport_parms *);
138static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
139 const struct sw_flow_key *,
140 const struct dp_upcall_info *);
141static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
142 const struct sw_flow_key *,
143 const struct dp_upcall_info *);
144
145/* Must be called with rcu_read_lock. */
146static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
147{
148 struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
149
150 if (dev) {
151 struct vport *vport = ovs_internal_dev_get_vport(dev);
152 if (vport)
153 return vport->dp;
154 }
155
156 return NULL;
157}
158
159/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
160 * returned dp pointer valid.
161 */
162static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
163{
164 struct datapath *dp;
165
166 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
167 rcu_read_lock();
168 dp = get_dp_rcu(net, dp_ifindex);
169 rcu_read_unlock();
170
171 return dp;
172}
173
174/* Must be called with rcu_read_lock or ovs_mutex. */
175const char *ovs_dp_name(const struct datapath *dp)
176{
177 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
178 return ovs_vport_name(vport);
179}
180
181static int get_dpifindex(const struct datapath *dp)
182{
183 struct vport *local;
184 int ifindex;
185
186 rcu_read_lock();
187
188 local = ovs_vport_rcu(dp, OVSP_LOCAL);
189 if (local)
190 ifindex = local->dev->ifindex;
191 else
192 ifindex = 0;
193
194 rcu_read_unlock();
195
196 return ifindex;
197}
198
199static void destroy_dp_rcu(struct rcu_head *rcu)
200{
201 struct datapath *dp = container_of(rcu, struct datapath, rcu);
202
203 ovs_flow_tbl_destroy(&dp->table);
204 free_percpu(dp->stats_percpu);
205 kfree(dp->ports);
206 kfree(dp);
207}
208
209static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
210 u16 port_no)
211{
212 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
213}
214
215/* Called with ovs_mutex or RCU read lock. */
216struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
217{
218 struct vport *vport;
219 struct hlist_head *head;
220
221 head = vport_hash_bucket(dp, port_no);
222 hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
223 if (vport->port_no == port_no)
224 return vport;
225 }
226 return NULL;
227}
228
229/* Called with ovs_mutex. */
230static struct vport *new_vport(const struct vport_parms *parms)
231{
232 struct vport *vport;
233
234 vport = ovs_vport_add(parms);
235 if (!IS_ERR(vport)) {
236 struct datapath *dp = parms->dp;
237 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
238
239 hlist_add_head_rcu(&vport->dp_hash_node, head);
240 }
241 return vport;
242}
243
244void ovs_dp_detach_port(struct vport *p)
245{
246 ASSERT_OVSL();
247
248 /* First drop references to device. */
249 hlist_del_rcu(&p->dp_hash_node);
250
251 /* Then destroy it. */
252 ovs_vport_del(p);
253}
254
255/* Must be called with rcu_read_lock. */
256void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
257{
258 const struct vport *p = OVS_CB(skb)->input_vport;
259 struct datapath *dp = p->dp;
260 struct sw_flow *flow;
261 struct sw_flow_actions *sf_acts;
262 struct dp_stats_percpu *stats;
263 u64 *stats_counter;
264 u32 n_mask_hit;
265
266 stats = this_cpu_ptr(dp->stats_percpu);
267
268 /* Look up flow. */
269 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
270 if (unlikely(!flow)) {
271 struct dp_upcall_info upcall;
272 int error;
273
274 memset(&upcall, 0, sizeof(upcall));
275 upcall.cmd = OVS_PACKET_CMD_MISS;
276 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
277 upcall.mru = OVS_CB(skb)->mru;
278 error = ovs_dp_upcall(dp, skb, key, &upcall);
279 if (unlikely(error))
280 kfree_skb(skb);
281 else
282 consume_skb(skb);
283 stats_counter = &stats->n_missed;
284 goto out;
285 }
286
287 ovs_flow_stats_update(flow, key->tp.flags, skb);
288 sf_acts = rcu_dereference(flow->sf_acts);
289 ovs_execute_actions(dp, skb, sf_acts, key);
290
291 stats_counter = &stats->n_hit;
292
293out:
294 /* Update datapath statistics. */
295 u64_stats_update_begin(&stats->syncp);
296 (*stats_counter)++;
297 stats->n_mask_hit += n_mask_hit;
298 u64_stats_update_end(&stats->syncp);
299}
300
301int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
302 const struct sw_flow_key *key,
303 const struct dp_upcall_info *upcall_info)
304{
305 struct dp_stats_percpu *stats;
306 int err;
307
308 if (upcall_info->portid == 0) {
309 err = -ENOTCONN;
310 goto err;
311 }
312
313 if (!skb_is_gso(skb))
314 err = queue_userspace_packet(dp, skb, key, upcall_info);
315 else
316 err = queue_gso_packets(dp, skb, key, upcall_info);
317 if (err)
318 goto err;
319
320 return 0;
321
322err:
323 stats = this_cpu_ptr(dp->stats_percpu);
324
325 u64_stats_update_begin(&stats->syncp);
326 stats->n_lost++;
327 u64_stats_update_end(&stats->syncp);
328
329 return err;
330}
331
332static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
333 const struct sw_flow_key *key,
334 const struct dp_upcall_info *upcall_info)
335{
336 unsigned short gso_type = skb_shinfo(skb)->gso_type;
337 struct sw_flow_key later_key;
338 struct sk_buff *segs, *nskb;
339 int err;
340
341 BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
342 segs = __skb_gso_segment(skb, NETIF_F_SG, false);
343 if (IS_ERR(segs))
344 return PTR_ERR(segs);
345 if (segs == NULL)
346 return -EINVAL;
347
348 if (gso_type & SKB_GSO_UDP) {
349 /* The initial flow key extracted by ovs_flow_key_extract()
350 * in this case is for a first fragment, so we need to
351 * properly mark later fragments.
352 */
353 later_key = *key;
354 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
355 }
356
357 /* Queue all of the segments. */
358 skb = segs;
359 do {
360 if (gso_type & SKB_GSO_UDP && skb != segs)
361 key = &later_key;
362
363 err = queue_userspace_packet(dp, skb, key, upcall_info);
364 if (err)
365 break;
366
367 } while ((skb = skb->next));
368
369 /* Free all of the segments. */
370 skb = segs;
371 do {
372 nskb = skb->next;
373 if (err)
374 kfree_skb(skb);
375 else
376 consume_skb(skb);
377 } while ((skb = nskb));
378 return err;
379}
380
381static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
382 unsigned int hdrlen)
383{
384 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
385 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
386 + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
387
388 /* OVS_PACKET_ATTR_USERDATA */
389 if (upcall_info->userdata)
390 size += NLA_ALIGN(upcall_info->userdata->nla_len);
391
392 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
393 if (upcall_info->egress_tun_info)
394 size += nla_total_size(ovs_tun_key_attr_size());
395
396 /* OVS_PACKET_ATTR_ACTIONS */
397 if (upcall_info->actions_len)
398 size += nla_total_size(upcall_info->actions_len);
399
400 /* OVS_PACKET_ATTR_MRU */
401 if (upcall_info->mru)
402 size += nla_total_size(sizeof(upcall_info->mru));
403
404 return size;
405}
406
407static void pad_packet(struct datapath *dp, struct sk_buff *skb)
408{
409 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
410 size_t plen = NLA_ALIGN(skb->len) - skb->len;
411
412 if (plen > 0)
413 memset(skb_put(skb, plen), 0, plen);
414 }
415}
416
417static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
418 const struct sw_flow_key *key,
419 const struct dp_upcall_info *upcall_info)
420{
421 struct ovs_header *upcall;
422 struct sk_buff *nskb = NULL;
423 struct sk_buff *user_skb = NULL; /* to be queued to userspace */
424 struct nlattr *nla;
425 size_t len;
426 unsigned int hlen;
427 int err, dp_ifindex;
428
429 dp_ifindex = get_dpifindex(dp);
430 if (!dp_ifindex)
431 return -ENODEV;
432
433 if (skb_vlan_tag_present(skb)) {
434 nskb = skb_clone(skb, GFP_ATOMIC);
435 if (!nskb)
436 return -ENOMEM;
437
438 nskb = __vlan_hwaccel_push_inside(nskb);
439 if (!nskb)
440 return -ENOMEM;
441
442 skb = nskb;
443 }
444
445 if (nla_attr_size(skb->len) > USHRT_MAX) {
446 err = -EFBIG;
447 goto out;
448 }
449
450 /* Complete checksum if needed */
451 if (skb->ip_summed == CHECKSUM_PARTIAL &&
452 (err = skb_checksum_help(skb)))
453 goto out;
454
455 /* Older versions of OVS user space enforce alignment of the last
456 * Netlink attribute to NLA_ALIGNTO which would require extensive
457 * padding logic. Only perform zerocopy if padding is not required.
458 */
459 if (dp->user_features & OVS_DP_F_UNALIGNED)
460 hlen = skb_zerocopy_headlen(skb);
461 else
462 hlen = skb->len;
463
464 len = upcall_msg_size(upcall_info, hlen);
465 user_skb = genlmsg_new(len, GFP_ATOMIC);
466 if (!user_skb) {
467 err = -ENOMEM;
468 goto out;
469 }
470
471 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
472 0, upcall_info->cmd);
473 upcall->dp_ifindex = dp_ifindex;
474
475 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
476 BUG_ON(err);
477
478 if (upcall_info->userdata)
479 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
480 nla_len(upcall_info->userdata),
481 nla_data(upcall_info->userdata));
482
483 if (upcall_info->egress_tun_info) {
484 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
485 err = ovs_nla_put_tunnel_info(user_skb,
486 upcall_info->egress_tun_info);
487 BUG_ON(err);
488 nla_nest_end(user_skb, nla);
489 }
490
491 if (upcall_info->actions_len) {
492 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
493 err = ovs_nla_put_actions(upcall_info->actions,
494 upcall_info->actions_len,
495 user_skb);
496 if (!err)
497 nla_nest_end(user_skb, nla);
498 else
499 nla_nest_cancel(user_skb, nla);
500 }
501
502 /* Add OVS_PACKET_ATTR_MRU */
503 if (upcall_info->mru) {
504 if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
505 upcall_info->mru)) {
506 err = -ENOBUFS;
507 goto out;
508 }
509 pad_packet(dp, user_skb);
510 }
511
512 /* Only reserve room for attribute header, packet data is added
513 * in skb_zerocopy() */
514 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
515 err = -ENOBUFS;
516 goto out;
517 }
518 nla->nla_len = nla_attr_size(skb->len);
519
520 err = skb_zerocopy(user_skb, skb, skb->len, hlen);
521 if (err)
522 goto out;
523
524 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
525 pad_packet(dp, user_skb);
526
527 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
528
529 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
530 user_skb = NULL;
531out:
532 if (err)
533 skb_tx_error(skb);
534 kfree_skb(user_skb);
535 kfree_skb(nskb);
536 return err;
537}
538
539static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
540{
541 struct ovs_header *ovs_header = info->userhdr;
542 struct net *net = sock_net(skb->sk);
543 struct nlattr **a = info->attrs;
544 struct sw_flow_actions *acts;
545 struct sk_buff *packet;
546 struct sw_flow *flow;
547 struct sw_flow_actions *sf_acts;
548 struct datapath *dp;
549 struct ethhdr *eth;
550 struct vport *input_vport;
551 u16 mru = 0;
552 int len;
553 int err;
554 bool log = !a[OVS_PACKET_ATTR_PROBE];
555
556 err = -EINVAL;
557 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
558 !a[OVS_PACKET_ATTR_ACTIONS])
559 goto err;
560
561 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
562 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
563 err = -ENOMEM;
564 if (!packet)
565 goto err;
566 skb_reserve(packet, NET_IP_ALIGN);
567
568 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
569
570 skb_reset_mac_header(packet);
571 eth = eth_hdr(packet);
572
573 /* Normally, setting the skb 'protocol' field would be handled by a
574 * call to eth_type_trans(), but it assumes there's a sending
575 * device, which we may not have. */
576 if (eth_proto_is_802_3(eth->h_proto))
577 packet->protocol = eth->h_proto;
578 else
579 packet->protocol = htons(ETH_P_802_2);
580
581 /* Set packet's mru */
582 if (a[OVS_PACKET_ATTR_MRU]) {
583 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
584 packet->ignore_df = 1;
585 }
586 OVS_CB(packet)->mru = mru;
587
588 /* Build an sw_flow for sending this packet. */
589 flow = ovs_flow_alloc();
590 err = PTR_ERR(flow);
591 if (IS_ERR(flow))
592 goto err_kfree_skb;
593
594 err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
595 packet, &flow->key, log);
596 if (err)
597 goto err_flow_free;
598
599 err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
600 &flow->key, &acts, log);
601 if (err)
602 goto err_flow_free;
603
604 rcu_assign_pointer(flow->sf_acts, acts);
605 packet->priority = flow->key.phy.priority;
606 packet->mark = flow->key.phy.skb_mark;
607
608 rcu_read_lock();
609 dp = get_dp_rcu(net, ovs_header->dp_ifindex);
610 err = -ENODEV;
611 if (!dp)
612 goto err_unlock;
613
614 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
615 if (!input_vport)
616 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
617
618 if (!input_vport)
619 goto err_unlock;
620
621 packet->dev = input_vport->dev;
622 OVS_CB(packet)->input_vport = input_vport;
623 sf_acts = rcu_dereference(flow->sf_acts);
624
625 local_bh_disable();
626 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
627 local_bh_enable();
628 rcu_read_unlock();
629
630 ovs_flow_free(flow, false);
631 return err;
632
633err_unlock:
634 rcu_read_unlock();
635err_flow_free:
636 ovs_flow_free(flow, false);
637err_kfree_skb:
638 kfree_skb(packet);
639err:
640 return err;
641}
642
643static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
644 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
645 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
646 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
647 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
648 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
649};
650
651static const struct genl_ops dp_packet_genl_ops[] = {
652 { .cmd = OVS_PACKET_CMD_EXECUTE,
653 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
654 .policy = packet_policy,
655 .doit = ovs_packet_cmd_execute
656 }
657};
658
659static struct genl_family dp_packet_genl_family = {
660 .id = GENL_ID_GENERATE,
661 .hdrsize = sizeof(struct ovs_header),
662 .name = OVS_PACKET_FAMILY,
663 .version = OVS_PACKET_VERSION,
664 .maxattr = OVS_PACKET_ATTR_MAX,
665 .netnsok = true,
666 .parallel_ops = true,
667 .ops = dp_packet_genl_ops,
668 .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
669};
670
671static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
672 struct ovs_dp_megaflow_stats *mega_stats)
673{
674 int i;
675
676 memset(mega_stats, 0, sizeof(*mega_stats));
677
678 stats->n_flows = ovs_flow_tbl_count(&dp->table);
679 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
680
681 stats->n_hit = stats->n_missed = stats->n_lost = 0;
682
683 for_each_possible_cpu(i) {
684 const struct dp_stats_percpu *percpu_stats;
685 struct dp_stats_percpu local_stats;
686 unsigned int start;
687
688 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
689
690 do {
691 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
692 local_stats = *percpu_stats;
693 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
694
695 stats->n_hit += local_stats.n_hit;
696 stats->n_missed += local_stats.n_missed;
697 stats->n_lost += local_stats.n_lost;
698 mega_stats->n_mask_hit += local_stats.n_mask_hit;
699 }
700}
701
702static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
703{
704 return ovs_identifier_is_ufid(sfid) &&
705 !(ufid_flags & OVS_UFID_F_OMIT_KEY);
706}
707
708static bool should_fill_mask(uint32_t ufid_flags)
709{
710 return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
711}
712
713static bool should_fill_actions(uint32_t ufid_flags)
714{
715 return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
716}
717
718static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
719 const struct sw_flow_id *sfid,
720 uint32_t ufid_flags)
721{
722 size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
723
724 /* OVS_FLOW_ATTR_UFID */
725 if (sfid && ovs_identifier_is_ufid(sfid))
726 len += nla_total_size(sfid->ufid_len);
727
728 /* OVS_FLOW_ATTR_KEY */
729 if (!sfid || should_fill_key(sfid, ufid_flags))
730 len += nla_total_size(ovs_key_attr_size());
731
732 /* OVS_FLOW_ATTR_MASK */
733 if (should_fill_mask(ufid_flags))
734 len += nla_total_size(ovs_key_attr_size());
735
736 /* OVS_FLOW_ATTR_ACTIONS */
737 if (should_fill_actions(ufid_flags))
738 len += nla_total_size(acts->orig_len);
739
740 return len
741 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
742 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
743 + nla_total_size(8); /* OVS_FLOW_ATTR_USED */
744}
745
746/* Called with ovs_mutex or RCU read lock. */
747static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
748 struct sk_buff *skb)
749{
750 struct ovs_flow_stats stats;
751 __be16 tcp_flags;
752 unsigned long used;
753
754 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
755
756 if (used &&
757 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
758 return -EMSGSIZE;
759
760 if (stats.n_packets &&
761 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
762 return -EMSGSIZE;
763
764 if ((u8)ntohs(tcp_flags) &&
765 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
766 return -EMSGSIZE;
767
768 return 0;
769}
770
771/* Called with ovs_mutex or RCU read lock. */
772static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
773 struct sk_buff *skb, int skb_orig_len)
774{
775 struct nlattr *start;
776 int err;
777
778 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
779 * this is the first flow to be dumped into 'skb'. This is unusual for
780 * Netlink but individual action lists can be longer than
781 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
782 * The userspace caller can always fetch the actions separately if it
783 * really wants them. (Most userspace callers in fact don't care.)
784 *
785 * This can only fail for dump operations because the skb is always
786 * properly sized for single flows.
787 */
788 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
789 if (start) {
790 const struct sw_flow_actions *sf_acts;
791
792 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
793 err = ovs_nla_put_actions(sf_acts->actions,
794 sf_acts->actions_len, skb);
795
796 if (!err)
797 nla_nest_end(skb, start);
798 else {
799 if (skb_orig_len)
800 return err;
801
802 nla_nest_cancel(skb, start);
803 }
804 } else if (skb_orig_len) {
805 return -EMSGSIZE;
806 }
807
808 return 0;
809}
810
811/* Called with ovs_mutex or RCU read lock. */
812static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
813 struct sk_buff *skb, u32 portid,
814 u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
815{
816 const int skb_orig_len = skb->len;
817 struct ovs_header *ovs_header;
818 int err;
819
820 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
821 flags, cmd);
822 if (!ovs_header)
823 return -EMSGSIZE;
824
825 ovs_header->dp_ifindex = dp_ifindex;
826
827 err = ovs_nla_put_identifier(flow, skb);
828 if (err)
829 goto error;
830
831 if (should_fill_key(&flow->id, ufid_flags)) {
832 err = ovs_nla_put_masked_key(flow, skb);
833 if (err)
834 goto error;
835 }
836
837 if (should_fill_mask(ufid_flags)) {
838 err = ovs_nla_put_mask(flow, skb);
839 if (err)
840 goto error;
841 }
842
843 err = ovs_flow_cmd_fill_stats(flow, skb);
844 if (err)
845 goto error;
846
847 if (should_fill_actions(ufid_flags)) {
848 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
849 if (err)
850 goto error;
851 }
852
853 genlmsg_end(skb, ovs_header);
854 return 0;
855
856error:
857 genlmsg_cancel(skb, ovs_header);
858 return err;
859}
860
861/* May not be called with RCU read lock. */
862static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
863 const struct sw_flow_id *sfid,
864 struct genl_info *info,
865 bool always,
866 uint32_t ufid_flags)
867{
868 struct sk_buff *skb;
869 size_t len;
870
871 if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
872 return NULL;
873
874 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
875 skb = genlmsg_new(len, GFP_KERNEL);
876 if (!skb)
877 return ERR_PTR(-ENOMEM);
878
879 return skb;
880}
881
882/* Called with ovs_mutex. */
883static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
884 int dp_ifindex,
885 struct genl_info *info, u8 cmd,
886 bool always, u32 ufid_flags)
887{
888 struct sk_buff *skb;
889 int retval;
890
891 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
892 &flow->id, info, always, ufid_flags);
893 if (IS_ERR_OR_NULL(skb))
894 return skb;
895
896 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
897 info->snd_portid, info->snd_seq, 0,
898 cmd, ufid_flags);
899 BUG_ON(retval < 0);
900 return skb;
901}
902
903static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
904{
905 struct net *net = sock_net(skb->sk);
906 struct nlattr **a = info->attrs;
907 struct ovs_header *ovs_header = info->userhdr;
908 struct sw_flow *flow = NULL, *new_flow;
909 struct sw_flow_mask mask;
910 struct sk_buff *reply;
911 struct datapath *dp;
912 struct sw_flow_key key;
913 struct sw_flow_actions *acts;
914 struct sw_flow_match match;
915 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
916 int error;
917 bool log = !a[OVS_FLOW_ATTR_PROBE];
918
919 /* Must have key and actions. */
920 error = -EINVAL;
921 if (!a[OVS_FLOW_ATTR_KEY]) {
922 OVS_NLERR(log, "Flow key attr not present in new flow.");
923 goto error;
924 }
925 if (!a[OVS_FLOW_ATTR_ACTIONS]) {
926 OVS_NLERR(log, "Flow actions attr not present in new flow.");
927 goto error;
928 }
929
930 /* Most of the time we need to allocate a new flow, do it before
931 * locking.
932 */
933 new_flow = ovs_flow_alloc();
934 if (IS_ERR(new_flow)) {
935 error = PTR_ERR(new_flow);
936 goto error;
937 }
938
939 /* Extract key. */
940 ovs_match_init(&match, &key, &mask);
941 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
942 a[OVS_FLOW_ATTR_MASK], log);
943 if (error)
944 goto err_kfree_flow;
945
946 ovs_flow_mask_key(&new_flow->key, &key, true, &mask);
947
948 /* Extract flow identifier. */
949 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
950 &key, log);
951 if (error)
952 goto err_kfree_flow;
953
954 /* Validate actions. */
955 error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
956 &new_flow->key, &acts, log);
957 if (error) {
958 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
959 goto err_kfree_flow;
960 }
961
962 reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
963 ufid_flags);
964 if (IS_ERR(reply)) {
965 error = PTR_ERR(reply);
966 goto err_kfree_acts;
967 }
968
969 ovs_lock();
970 dp = get_dp(net, ovs_header->dp_ifindex);
971 if (unlikely(!dp)) {
972 error = -ENODEV;
973 goto err_unlock_ovs;
974 }
975
976 /* Check if this is a duplicate flow */
977 if (ovs_identifier_is_ufid(&new_flow->id))
978 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
979 if (!flow)
980 flow = ovs_flow_tbl_lookup(&dp->table, &key);
981 if (likely(!flow)) {
982 rcu_assign_pointer(new_flow->sf_acts, acts);
983
984 /* Put flow in bucket. */
985 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
986 if (unlikely(error)) {
987 acts = NULL;
988 goto err_unlock_ovs;
989 }
990
991 if (unlikely(reply)) {
992 error = ovs_flow_cmd_fill_info(new_flow,
993 ovs_header->dp_ifindex,
994 reply, info->snd_portid,
995 info->snd_seq, 0,
996 OVS_FLOW_CMD_NEW,
997 ufid_flags);
998 BUG_ON(error < 0);
999 }
1000 ovs_unlock();
1001 } else {
1002 struct sw_flow_actions *old_acts;
1003
1004 /* Bail out if we're not allowed to modify an existing flow.
1005 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1006 * because Generic Netlink treats the latter as a dump
1007 * request. We also accept NLM_F_EXCL in case that bug ever
1008 * gets fixed.
1009 */
1010 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1011 | NLM_F_EXCL))) {
1012 error = -EEXIST;
1013 goto err_unlock_ovs;
1014 }
1015 /* The flow identifier has to be the same for flow updates.
1016 * Look for any overlapping flow.
1017 */
1018 if (unlikely(!ovs_flow_cmp(flow, &match))) {
1019 if (ovs_identifier_is_key(&flow->id))
1020 flow = ovs_flow_tbl_lookup_exact(&dp->table,
1021 &match);
1022 else /* UFID matches but key is different */
1023 flow = NULL;
1024 if (!flow) {
1025 error = -ENOENT;
1026 goto err_unlock_ovs;
1027 }
1028 }
1029 /* Update actions. */
1030 old_acts = ovsl_dereference(flow->sf_acts);
1031 rcu_assign_pointer(flow->sf_acts, acts);
1032
1033 if (unlikely(reply)) {
1034 error = ovs_flow_cmd_fill_info(flow,
1035 ovs_header->dp_ifindex,
1036 reply, info->snd_portid,
1037 info->snd_seq, 0,
1038 OVS_FLOW_CMD_NEW,
1039 ufid_flags);
1040 BUG_ON(error < 0);
1041 }
1042 ovs_unlock();
1043
1044 ovs_nla_free_flow_actions_rcu(old_acts);
1045 ovs_flow_free(new_flow, false);
1046 }
1047
1048 if (reply)
1049 ovs_notify(&dp_flow_genl_family, reply, info);
1050 return 0;
1051
1052err_unlock_ovs:
1053 ovs_unlock();
1054 kfree_skb(reply);
1055err_kfree_acts:
1056 ovs_nla_free_flow_actions(acts);
1057err_kfree_flow:
1058 ovs_flow_free(new_flow, false);
1059error:
1060 return error;
1061}
1062
1063/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1064static struct sw_flow_actions *get_flow_actions(struct net *net,
1065 const struct nlattr *a,
1066 const struct sw_flow_key *key,
1067 const struct sw_flow_mask *mask,
1068 bool log)
1069{
1070 struct sw_flow_actions *acts;
1071 struct sw_flow_key masked_key;
1072 int error;
1073
1074 ovs_flow_mask_key(&masked_key, key, true, mask);
1075 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1076 if (error) {
1077 OVS_NLERR(log,
1078 "Actions may not be safe on all matching packets");
1079 return ERR_PTR(error);
1080 }
1081
1082 return acts;
1083}
1084
1085static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1086{
1087 struct net *net = sock_net(skb->sk);
1088 struct nlattr **a = info->attrs;
1089 struct ovs_header *ovs_header = info->userhdr;
1090 struct sw_flow_key key;
1091 struct sw_flow *flow;
1092 struct sw_flow_mask mask;
1093 struct sk_buff *reply = NULL;
1094 struct datapath *dp;
1095 struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1096 struct sw_flow_match match;
1097 struct sw_flow_id sfid;
1098 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1099 int error = 0;
1100 bool log = !a[OVS_FLOW_ATTR_PROBE];
1101 bool ufid_present;
1102
1103 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1104 if (a[OVS_FLOW_ATTR_KEY]) {
1105 ovs_match_init(&match, &key, &mask);
1106 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1107 a[OVS_FLOW_ATTR_MASK], log);
1108 } else if (!ufid_present) {
1109 OVS_NLERR(log,
1110 "Flow set message rejected, Key attribute missing.");
1111 error = -EINVAL;
1112 }
1113 if (error)
1114 goto error;
1115
1116 /* Validate actions. */
1117 if (a[OVS_FLOW_ATTR_ACTIONS]) {
1118 if (!a[OVS_FLOW_ATTR_KEY]) {
1119 OVS_NLERR(log,
1120 "Flow key attribute not present in set flow.");
1121 error = -EINVAL;
1122 goto error;
1123 }
1124
1125 acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
1126 &mask, log);
1127 if (IS_ERR(acts)) {
1128 error = PTR_ERR(acts);
1129 goto error;
1130 }
1131
1132 /* Can allocate before locking if have acts. */
1133 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1134 ufid_flags);
1135 if (IS_ERR(reply)) {
1136 error = PTR_ERR(reply);
1137 goto err_kfree_acts;
1138 }
1139 }
1140
1141 ovs_lock();
1142 dp = get_dp(net, ovs_header->dp_ifindex);
1143 if (unlikely(!dp)) {
1144 error = -ENODEV;
1145 goto err_unlock_ovs;
1146 }
1147 /* Check that the flow exists. */
1148 if (ufid_present)
1149 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1150 else
1151 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1152 if (unlikely(!flow)) {
1153 error = -ENOENT;
1154 goto err_unlock_ovs;
1155 }
1156
1157 /* Update actions, if present. */
1158 if (likely(acts)) {
1159 old_acts = ovsl_dereference(flow->sf_acts);
1160 rcu_assign_pointer(flow->sf_acts, acts);
1161
1162 if (unlikely(reply)) {
1163 error = ovs_flow_cmd_fill_info(flow,
1164 ovs_header->dp_ifindex,
1165 reply, info->snd_portid,
1166 info->snd_seq, 0,
1167 OVS_FLOW_CMD_NEW,
1168 ufid_flags);
1169 BUG_ON(error < 0);
1170 }
1171 } else {
1172 /* Could not alloc without acts before locking. */
1173 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1174 info, OVS_FLOW_CMD_NEW, false,
1175 ufid_flags);
1176
1177 if (IS_ERR(reply)) {
1178 error = PTR_ERR(reply);
1179 goto err_unlock_ovs;
1180 }
1181 }
1182
1183 /* Clear stats. */
1184 if (a[OVS_FLOW_ATTR_CLEAR])
1185 ovs_flow_stats_clear(flow);
1186 ovs_unlock();
1187
1188 if (reply)
1189 ovs_notify(&dp_flow_genl_family, reply, info);
1190 if (old_acts)
1191 ovs_nla_free_flow_actions_rcu(old_acts);
1192
1193 return 0;
1194
1195err_unlock_ovs:
1196 ovs_unlock();
1197 kfree_skb(reply);
1198err_kfree_acts:
1199 ovs_nla_free_flow_actions(acts);
1200error:
1201 return error;
1202}
1203
1204static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1205{
1206 struct nlattr **a = info->attrs;
1207 struct ovs_header *ovs_header = info->userhdr;
1208 struct net *net = sock_net(skb->sk);
1209 struct sw_flow_key key;
1210 struct sk_buff *reply;
1211 struct sw_flow *flow;
1212 struct datapath *dp;
1213 struct sw_flow_match match;
1214 struct sw_flow_id ufid;
1215 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1216 int err = 0;
1217 bool log = !a[OVS_FLOW_ATTR_PROBE];
1218 bool ufid_present;
1219
1220 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1221 if (a[OVS_FLOW_ATTR_KEY]) {
1222 ovs_match_init(&match, &key, NULL);
1223 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1224 log);
1225 } else if (!ufid_present) {
1226 OVS_NLERR(log,
1227 "Flow get message rejected, Key attribute missing.");
1228 err = -EINVAL;
1229 }
1230 if (err)
1231 return err;
1232
1233 ovs_lock();
1234 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1235 if (!dp) {
1236 err = -ENODEV;
1237 goto unlock;
1238 }
1239
1240 if (ufid_present)
1241 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1242 else
1243 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1244 if (!flow) {
1245 err = -ENOENT;
1246 goto unlock;
1247 }
1248
1249 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1250 OVS_FLOW_CMD_NEW, true, ufid_flags);
1251 if (IS_ERR(reply)) {
1252 err = PTR_ERR(reply);
1253 goto unlock;
1254 }
1255
1256 ovs_unlock();
1257 return genlmsg_reply(reply, info);
1258unlock:
1259 ovs_unlock();
1260 return err;
1261}
1262
1263static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1264{
1265 struct nlattr **a = info->attrs;
1266 struct ovs_header *ovs_header = info->userhdr;
1267 struct net *net = sock_net(skb->sk);
1268 struct sw_flow_key key;
1269 struct sk_buff *reply;
1270 struct sw_flow *flow = NULL;
1271 struct datapath *dp;
1272 struct sw_flow_match match;
1273 struct sw_flow_id ufid;
1274 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1275 int err;
1276 bool log = !a[OVS_FLOW_ATTR_PROBE];
1277 bool ufid_present;
1278
1279 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1280 if (a[OVS_FLOW_ATTR_KEY]) {
1281 ovs_match_init(&match, &key, NULL);
1282 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1283 NULL, log);
1284 if (unlikely(err))
1285 return err;
1286 }
1287
1288 ovs_lock();
1289 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1290 if (unlikely(!dp)) {
1291 err = -ENODEV;
1292 goto unlock;
1293 }
1294
1295 if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1296 err = ovs_flow_tbl_flush(&dp->table);
1297 goto unlock;
1298 }
1299
1300 if (ufid_present)
1301 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1302 else
1303 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1304 if (unlikely(!flow)) {
1305 err = -ENOENT;
1306 goto unlock;
1307 }
1308
1309 ovs_flow_tbl_remove(&dp->table, flow);
1310 ovs_unlock();
1311
1312 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1313 &flow->id, info, false, ufid_flags);
1314 if (likely(reply)) {
1315 if (likely(!IS_ERR(reply))) {
1316 rcu_read_lock(); /*To keep RCU checker happy. */
1317 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1318 reply, info->snd_portid,
1319 info->snd_seq, 0,
1320 OVS_FLOW_CMD_DEL,
1321 ufid_flags);
1322 rcu_read_unlock();
1323 BUG_ON(err < 0);
1324
1325 ovs_notify(&dp_flow_genl_family, reply, info);
1326 } else {
1327 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
1328 }
1329 }
1330
1331 ovs_flow_free(flow, true);
1332 return 0;
1333unlock:
1334 ovs_unlock();
1335 return err;
1336}
1337
1338static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1339{
1340 struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1341 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1342 struct table_instance *ti;
1343 struct datapath *dp;
1344 u32 ufid_flags;
1345 int err;
1346
1347 err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
1348 OVS_FLOW_ATTR_MAX, flow_policy);
1349 if (err)
1350 return err;
1351 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1352
1353 rcu_read_lock();
1354 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1355 if (!dp) {
1356 rcu_read_unlock();
1357 return -ENODEV;
1358 }
1359
1360 ti = rcu_dereference(dp->table.ti);
1361 for (;;) {
1362 struct sw_flow *flow;
1363 u32 bucket, obj;
1364
1365 bucket = cb->args[0];
1366 obj = cb->args[1];
1367 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1368 if (!flow)
1369 break;
1370
1371 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1372 NETLINK_CB(cb->skb).portid,
1373 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1374 OVS_FLOW_CMD_NEW, ufid_flags) < 0)
1375 break;
1376
1377 cb->args[0] = bucket;
1378 cb->args[1] = obj;
1379 }
1380 rcu_read_unlock();
1381 return skb->len;
1382}
1383
1384static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1385 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1386 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1387 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1388 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1389 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1390 [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1391 [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1392};
1393
1394static const struct genl_ops dp_flow_genl_ops[] = {
1395 { .cmd = OVS_FLOW_CMD_NEW,
1396 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1397 .policy = flow_policy,
1398 .doit = ovs_flow_cmd_new
1399 },
1400 { .cmd = OVS_FLOW_CMD_DEL,
1401 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1402 .policy = flow_policy,
1403 .doit = ovs_flow_cmd_del
1404 },
1405 { .cmd = OVS_FLOW_CMD_GET,
1406 .flags = 0, /* OK for unprivileged users. */
1407 .policy = flow_policy,
1408 .doit = ovs_flow_cmd_get,
1409 .dumpit = ovs_flow_cmd_dump
1410 },
1411 { .cmd = OVS_FLOW_CMD_SET,
1412 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1413 .policy = flow_policy,
1414 .doit = ovs_flow_cmd_set,
1415 },
1416};
1417
1418static struct genl_family dp_flow_genl_family = {
1419 .id = GENL_ID_GENERATE,
1420 .hdrsize = sizeof(struct ovs_header),
1421 .name = OVS_FLOW_FAMILY,
1422 .version = OVS_FLOW_VERSION,
1423 .maxattr = OVS_FLOW_ATTR_MAX,
1424 .netnsok = true,
1425 .parallel_ops = true,
1426 .ops = dp_flow_genl_ops,
1427 .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1428 .mcgrps = &ovs_dp_flow_multicast_group,
1429 .n_mcgrps = 1,
1430};
1431
1432static size_t ovs_dp_cmd_msg_size(void)
1433{
1434 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1435
1436 msgsize += nla_total_size(IFNAMSIZ);
1437 msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1438 msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
1439 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1440
1441 return msgsize;
1442}
1443
1444/* Called with ovs_mutex. */
1445static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1446 u32 portid, u32 seq, u32 flags, u8 cmd)
1447{
1448 struct ovs_header *ovs_header;
1449 struct ovs_dp_stats dp_stats;
1450 struct ovs_dp_megaflow_stats dp_megaflow_stats;
1451 int err;
1452
1453 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1454 flags, cmd);
1455 if (!ovs_header)
1456 goto error;
1457
1458 ovs_header->dp_ifindex = get_dpifindex(dp);
1459
1460 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1461 if (err)
1462 goto nla_put_failure;
1463
1464 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1465 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1466 &dp_stats))
1467 goto nla_put_failure;
1468
1469 if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1470 sizeof(struct ovs_dp_megaflow_stats),
1471 &dp_megaflow_stats))
1472 goto nla_put_failure;
1473
1474 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1475 goto nla_put_failure;
1476
1477 genlmsg_end(skb, ovs_header);
1478 return 0;
1479
1480nla_put_failure:
1481 genlmsg_cancel(skb, ovs_header);
1482error:
1483 return -EMSGSIZE;
1484}
1485
1486static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1487{
1488 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1489}
1490
1491/* Called with rcu_read_lock or ovs_mutex. */
1492static struct datapath *lookup_datapath(struct net *net,
1493 const struct ovs_header *ovs_header,
1494 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1495{
1496 struct datapath *dp;
1497
1498 if (!a[OVS_DP_ATTR_NAME])
1499 dp = get_dp(net, ovs_header->dp_ifindex);
1500 else {
1501 struct vport *vport;
1502
1503 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1504 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1505 }
1506 return dp ? dp : ERR_PTR(-ENODEV);
1507}
1508
1509static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1510{
1511 struct datapath *dp;
1512
1513 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1514 if (IS_ERR(dp))
1515 return;
1516
1517 WARN(dp->user_features, "Dropping previously announced user features\n");
1518 dp->user_features = 0;
1519}
1520
1521static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1522{
1523 if (a[OVS_DP_ATTR_USER_FEATURES])
1524 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1525}
1526
1527static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1528{
1529 struct nlattr **a = info->attrs;
1530 struct vport_parms parms;
1531 struct sk_buff *reply;
1532 struct datapath *dp;
1533 struct vport *vport;
1534 struct ovs_net *ovs_net;
1535 int err, i;
1536
1537 err = -EINVAL;
1538 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1539 goto err;
1540
1541 reply = ovs_dp_cmd_alloc_info();
1542 if (!reply)
1543 return -ENOMEM;
1544
1545 err = -ENOMEM;
1546 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1547 if (dp == NULL)
1548 goto err_free_reply;
1549
1550 ovs_dp_set_net(dp, sock_net(skb->sk));
1551
1552 /* Allocate table. */
1553 err = ovs_flow_tbl_init(&dp->table);
1554 if (err)
1555 goto err_free_dp;
1556
1557 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1558 if (!dp->stats_percpu) {
1559 err = -ENOMEM;
1560 goto err_destroy_table;
1561 }
1562
1563 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1564 GFP_KERNEL);
1565 if (!dp->ports) {
1566 err = -ENOMEM;
1567 goto err_destroy_percpu;
1568 }
1569
1570 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1571 INIT_HLIST_HEAD(&dp->ports[i]);
1572
1573 /* Set up our datapath device. */
1574 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1575 parms.type = OVS_VPORT_TYPE_INTERNAL;
1576 parms.options = NULL;
1577 parms.dp = dp;
1578 parms.port_no = OVSP_LOCAL;
1579 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1580
1581 ovs_dp_change(dp, a);
1582
1583 /* So far only local changes have been made, now need the lock. */
1584 ovs_lock();
1585
1586 vport = new_vport(&parms);
1587 if (IS_ERR(vport)) {
1588 err = PTR_ERR(vport);
1589 if (err == -EBUSY)
1590 err = -EEXIST;
1591
1592 if (err == -EEXIST) {
1593 /* An outdated user space instance that does not understand
1594 * the concept of user_features has attempted to create a new
1595 * datapath and is likely to reuse it. Drop all user features.
1596 */
1597 if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1598 ovs_dp_reset_user_features(skb, info);
1599 }
1600
1601 goto err_destroy_ports_array;
1602 }
1603
1604 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1605 info->snd_seq, 0, OVS_DP_CMD_NEW);
1606 BUG_ON(err < 0);
1607
1608 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1609 list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1610
1611 ovs_unlock();
1612
1613 ovs_notify(&dp_datapath_genl_family, reply, info);
1614 return 0;
1615
1616err_destroy_ports_array:
1617 ovs_unlock();
1618 kfree(dp->ports);
1619err_destroy_percpu:
1620 free_percpu(dp->stats_percpu);
1621err_destroy_table:
1622 ovs_flow_tbl_destroy(&dp->table);
1623err_free_dp:
1624 kfree(dp);
1625err_free_reply:
1626 kfree_skb(reply);
1627err:
1628 return err;
1629}
1630
1631/* Called with ovs_mutex. */
1632static void __dp_destroy(struct datapath *dp)
1633{
1634 int i;
1635
1636 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1637 struct vport *vport;
1638 struct hlist_node *n;
1639
1640 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1641 if (vport->port_no != OVSP_LOCAL)
1642 ovs_dp_detach_port(vport);
1643 }
1644
1645 list_del_rcu(&dp->list_node);
1646
1647 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1648 * all ports in datapath are destroyed first before freeing datapath.
1649 */
1650 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1651
1652 /* RCU destroy the flow table */
1653 call_rcu(&dp->rcu, destroy_dp_rcu);
1654}
1655
1656static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1657{
1658 struct sk_buff *reply;
1659 struct datapath *dp;
1660 int err;
1661
1662 reply = ovs_dp_cmd_alloc_info();
1663 if (!reply)
1664 return -ENOMEM;
1665
1666 ovs_lock();
1667 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1668 err = PTR_ERR(dp);
1669 if (IS_ERR(dp))
1670 goto err_unlock_free;
1671
1672 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1673 info->snd_seq, 0, OVS_DP_CMD_DEL);
1674 BUG_ON(err < 0);
1675
1676 __dp_destroy(dp);
1677 ovs_unlock();
1678
1679 ovs_notify(&dp_datapath_genl_family, reply, info);
1680
1681 return 0;
1682
1683err_unlock_free:
1684 ovs_unlock();
1685 kfree_skb(reply);
1686 return err;
1687}
1688
1689static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1690{
1691 struct sk_buff *reply;
1692 struct datapath *dp;
1693 int err;
1694
1695 reply = ovs_dp_cmd_alloc_info();
1696 if (!reply)
1697 return -ENOMEM;
1698
1699 ovs_lock();
1700 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1701 err = PTR_ERR(dp);
1702 if (IS_ERR(dp))
1703 goto err_unlock_free;
1704
1705 ovs_dp_change(dp, info->attrs);
1706
1707 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1708 info->snd_seq, 0, OVS_DP_CMD_NEW);
1709 BUG_ON(err < 0);
1710
1711 ovs_unlock();
1712 ovs_notify(&dp_datapath_genl_family, reply, info);
1713
1714 return 0;
1715
1716err_unlock_free:
1717 ovs_unlock();
1718 kfree_skb(reply);
1719 return err;
1720}
1721
1722static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1723{
1724 struct sk_buff *reply;
1725 struct datapath *dp;
1726 int err;
1727
1728 reply = ovs_dp_cmd_alloc_info();
1729 if (!reply)
1730 return -ENOMEM;
1731
1732 ovs_lock();
1733 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1734 if (IS_ERR(dp)) {
1735 err = PTR_ERR(dp);
1736 goto err_unlock_free;
1737 }
1738 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1739 info->snd_seq, 0, OVS_DP_CMD_NEW);
1740 BUG_ON(err < 0);
1741 ovs_unlock();
1742
1743 return genlmsg_reply(reply, info);
1744
1745err_unlock_free:
1746 ovs_unlock();
1747 kfree_skb(reply);
1748 return err;
1749}
1750
1751static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1752{
1753 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1754 struct datapath *dp;
1755 int skip = cb->args[0];
1756 int i = 0;
1757
1758 ovs_lock();
1759 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1760 if (i >= skip &&
1761 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1762 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1763 OVS_DP_CMD_NEW) < 0)
1764 break;
1765 i++;
1766 }
1767 ovs_unlock();
1768
1769 cb->args[0] = i;
1770
1771 return skb->len;
1772}
1773
1774static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1775 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1776 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1777 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1778};
1779
1780static const struct genl_ops dp_datapath_genl_ops[] = {
1781 { .cmd = OVS_DP_CMD_NEW,
1782 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1783 .policy = datapath_policy,
1784 .doit = ovs_dp_cmd_new
1785 },
1786 { .cmd = OVS_DP_CMD_DEL,
1787 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1788 .policy = datapath_policy,
1789 .doit = ovs_dp_cmd_del
1790 },
1791 { .cmd = OVS_DP_CMD_GET,
1792 .flags = 0, /* OK for unprivileged users. */
1793 .policy = datapath_policy,
1794 .doit = ovs_dp_cmd_get,
1795 .dumpit = ovs_dp_cmd_dump
1796 },
1797 { .cmd = OVS_DP_CMD_SET,
1798 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1799 .policy = datapath_policy,
1800 .doit = ovs_dp_cmd_set,
1801 },
1802};
1803
1804static struct genl_family dp_datapath_genl_family = {
1805 .id = GENL_ID_GENERATE,
1806 .hdrsize = sizeof(struct ovs_header),
1807 .name = OVS_DATAPATH_FAMILY,
1808 .version = OVS_DATAPATH_VERSION,
1809 .maxattr = OVS_DP_ATTR_MAX,
1810 .netnsok = true,
1811 .parallel_ops = true,
1812 .ops = dp_datapath_genl_ops,
1813 .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1814 .mcgrps = &ovs_dp_datapath_multicast_group,
1815 .n_mcgrps = 1,
1816};
1817
1818/* Called with ovs_mutex or RCU read lock. */
1819static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1820 u32 portid, u32 seq, u32 flags, u8 cmd)
1821{
1822 struct ovs_header *ovs_header;
1823 struct ovs_vport_stats vport_stats;
1824 int err;
1825
1826 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1827 flags, cmd);
1828 if (!ovs_header)
1829 return -EMSGSIZE;
1830
1831 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1832
1833 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1834 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1835 nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1836 ovs_vport_name(vport)))
1837 goto nla_put_failure;
1838
1839 ovs_vport_get_stats(vport, &vport_stats);
1840 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1841 &vport_stats))
1842 goto nla_put_failure;
1843
1844 if (ovs_vport_get_upcall_portids(vport, skb))
1845 goto nla_put_failure;
1846
1847 err = ovs_vport_get_options(vport, skb);
1848 if (err == -EMSGSIZE)
1849 goto error;
1850
1851 genlmsg_end(skb, ovs_header);
1852 return 0;
1853
1854nla_put_failure:
1855 err = -EMSGSIZE;
1856error:
1857 genlmsg_cancel(skb, ovs_header);
1858 return err;
1859}
1860
1861static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1862{
1863 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1864}
1865
1866/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1867struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1868 u32 seq, u8 cmd)
1869{
1870 struct sk_buff *skb;
1871 int retval;
1872
1873 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1874 if (!skb)
1875 return ERR_PTR(-ENOMEM);
1876
1877 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1878 BUG_ON(retval < 0);
1879
1880 return skb;
1881}
1882
1883/* Called with ovs_mutex or RCU read lock. */
1884static struct vport *lookup_vport(struct net *net,
1885 const struct ovs_header *ovs_header,
1886 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1887{
1888 struct datapath *dp;
1889 struct vport *vport;
1890
1891 if (a[OVS_VPORT_ATTR_NAME]) {
1892 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1893 if (!vport)
1894 return ERR_PTR(-ENODEV);
1895 if (ovs_header->dp_ifindex &&
1896 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1897 return ERR_PTR(-ENODEV);
1898 return vport;
1899 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1900 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1901
1902 if (port_no >= DP_MAX_PORTS)
1903 return ERR_PTR(-EFBIG);
1904
1905 dp = get_dp(net, ovs_header->dp_ifindex);
1906 if (!dp)
1907 return ERR_PTR(-ENODEV);
1908
1909 vport = ovs_vport_ovsl_rcu(dp, port_no);
1910 if (!vport)
1911 return ERR_PTR(-ENODEV);
1912 return vport;
1913 } else
1914 return ERR_PTR(-EINVAL);
1915}
1916
1917/* Called with ovs_mutex */
1918static void update_headroom(struct datapath *dp)
1919{
1920 unsigned dev_headroom, max_headroom = 0;
1921 struct net_device *dev;
1922 struct vport *vport;
1923 int i;
1924
1925 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1926 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
1927 dev = vport->dev;
1928 dev_headroom = netdev_get_fwd_headroom(dev);
1929 if (dev_headroom > max_headroom)
1930 max_headroom = dev_headroom;
1931 }
1932 }
1933
1934 dp->max_headroom = max_headroom;
1935 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1936 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
1937 netdev_set_rx_headroom(vport->dev, max_headroom);
1938}
1939
1940static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1941{
1942 struct nlattr **a = info->attrs;
1943 struct ovs_header *ovs_header = info->userhdr;
1944 struct vport_parms parms;
1945 struct sk_buff *reply;
1946 struct vport *vport;
1947 struct datapath *dp;
1948 u32 port_no;
1949 int err;
1950
1951 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1952 !a[OVS_VPORT_ATTR_UPCALL_PID])
1953 return -EINVAL;
1954
1955 port_no = a[OVS_VPORT_ATTR_PORT_NO]
1956 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
1957 if (port_no >= DP_MAX_PORTS)
1958 return -EFBIG;
1959
1960 reply = ovs_vport_cmd_alloc_info();
1961 if (!reply)
1962 return -ENOMEM;
1963
1964 ovs_lock();
1965restart:
1966 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1967 err = -ENODEV;
1968 if (!dp)
1969 goto exit_unlock_free;
1970
1971 if (port_no) {
1972 vport = ovs_vport_ovsl(dp, port_no);
1973 err = -EBUSY;
1974 if (vport)
1975 goto exit_unlock_free;
1976 } else {
1977 for (port_no = 1; ; port_no++) {
1978 if (port_no >= DP_MAX_PORTS) {
1979 err = -EFBIG;
1980 goto exit_unlock_free;
1981 }
1982 vport = ovs_vport_ovsl(dp, port_no);
1983 if (!vport)
1984 break;
1985 }
1986 }
1987
1988 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1989 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1990 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1991 parms.dp = dp;
1992 parms.port_no = port_no;
1993 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
1994
1995 vport = new_vport(&parms);
1996 err = PTR_ERR(vport);
1997 if (IS_ERR(vport)) {
1998 if (err == -EAGAIN)
1999 goto restart;
2000 goto exit_unlock_free;
2001 }
2002
2003 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2004 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2005
2006 if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
2007 update_headroom(dp);
2008 else
2009 netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2010
2011 BUG_ON(err < 0);
2012 ovs_unlock();
2013
2014 ovs_notify(&dp_vport_genl_family, reply, info);
2015 return 0;
2016
2017exit_unlock_free:
2018 ovs_unlock();
2019 kfree_skb(reply);
2020 return err;
2021}
2022
2023static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2024{
2025 struct nlattr **a = info->attrs;
2026 struct sk_buff *reply;
2027 struct vport *vport;
2028 int err;
2029
2030 reply = ovs_vport_cmd_alloc_info();
2031 if (!reply)
2032 return -ENOMEM;
2033
2034 ovs_lock();
2035 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2036 err = PTR_ERR(vport);
2037 if (IS_ERR(vport))
2038 goto exit_unlock_free;
2039
2040 if (a[OVS_VPORT_ATTR_TYPE] &&
2041 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2042 err = -EINVAL;
2043 goto exit_unlock_free;
2044 }
2045
2046 if (a[OVS_VPORT_ATTR_OPTIONS]) {
2047 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2048 if (err)
2049 goto exit_unlock_free;
2050 }
2051
2052
2053 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2054 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2055
2056 err = ovs_vport_set_upcall_portids(vport, ids);
2057 if (err)
2058 goto exit_unlock_free;
2059 }
2060
2061 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2062 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2063 BUG_ON(err < 0);
2064
2065 ovs_unlock();
2066 ovs_notify(&dp_vport_genl_family, reply, info);
2067 return 0;
2068
2069exit_unlock_free:
2070 ovs_unlock();
2071 kfree_skb(reply);
2072 return err;
2073}
2074
2075static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2076{
2077 bool must_update_headroom = false;
2078 struct nlattr **a = info->attrs;
2079 struct sk_buff *reply;
2080 struct datapath *dp;
2081 struct vport *vport;
2082 int err;
2083
2084 reply = ovs_vport_cmd_alloc_info();
2085 if (!reply)
2086 return -ENOMEM;
2087
2088 ovs_lock();
2089 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2090 err = PTR_ERR(vport);
2091 if (IS_ERR(vport))
2092 goto exit_unlock_free;
2093
2094 if (vport->port_no == OVSP_LOCAL) {
2095 err = -EINVAL;
2096 goto exit_unlock_free;
2097 }
2098
2099 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2100 info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2101 BUG_ON(err < 0);
2102
2103 /* the vport deletion may trigger dp headroom update */
2104 dp = vport->dp;
2105 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2106 must_update_headroom = true;
2107 netdev_reset_rx_headroom(vport->dev);
2108 ovs_dp_detach_port(vport);
2109
2110 if (must_update_headroom)
2111 update_headroom(dp);
2112 ovs_unlock();
2113
2114 ovs_notify(&dp_vport_genl_family, reply, info);
2115 return 0;
2116
2117exit_unlock_free:
2118 ovs_unlock();
2119 kfree_skb(reply);
2120 return err;
2121}
2122
2123static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2124{
2125 struct nlattr **a = info->attrs;
2126 struct ovs_header *ovs_header = info->userhdr;
2127 struct sk_buff *reply;
2128 struct vport *vport;
2129 int err;
2130
2131 reply = ovs_vport_cmd_alloc_info();
2132 if (!reply)
2133 return -ENOMEM;
2134
2135 rcu_read_lock();
2136 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2137 err = PTR_ERR(vport);
2138 if (IS_ERR(vport))
2139 goto exit_unlock_free;
2140 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2141 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2142 BUG_ON(err < 0);
2143 rcu_read_unlock();
2144
2145 return genlmsg_reply(reply, info);
2146
2147exit_unlock_free:
2148 rcu_read_unlock();
2149 kfree_skb(reply);
2150 return err;
2151}
2152
2153static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2154{
2155 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2156 struct datapath *dp;
2157 int bucket = cb->args[0], skip = cb->args[1];
2158 int i, j = 0;
2159
2160 rcu_read_lock();
2161 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2162 if (!dp) {
2163 rcu_read_unlock();
2164 return -ENODEV;
2165 }
2166 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2167 struct vport *vport;
2168
2169 j = 0;
2170 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2171 if (j >= skip &&
2172 ovs_vport_cmd_fill_info(vport, skb,
2173 NETLINK_CB(cb->skb).portid,
2174 cb->nlh->nlmsg_seq,
2175 NLM_F_MULTI,
2176 OVS_VPORT_CMD_NEW) < 0)
2177 goto out;
2178
2179 j++;
2180 }
2181 skip = 0;
2182 }
2183out:
2184 rcu_read_unlock();
2185
2186 cb->args[0] = i;
2187 cb->args[1] = j;
2188
2189 return skb->len;
2190}
2191
2192static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2193 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2194 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2195 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2196 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2197 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2198 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2199};
2200
2201static const struct genl_ops dp_vport_genl_ops[] = {
2202 { .cmd = OVS_VPORT_CMD_NEW,
2203 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2204 .policy = vport_policy,
2205 .doit = ovs_vport_cmd_new
2206 },
2207 { .cmd = OVS_VPORT_CMD_DEL,
2208 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2209 .policy = vport_policy,
2210 .doit = ovs_vport_cmd_del
2211 },
2212 { .cmd = OVS_VPORT_CMD_GET,
2213 .flags = 0, /* OK for unprivileged users. */
2214 .policy = vport_policy,
2215 .doit = ovs_vport_cmd_get,
2216 .dumpit = ovs_vport_cmd_dump
2217 },
2218 { .cmd = OVS_VPORT_CMD_SET,
2219 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2220 .policy = vport_policy,
2221 .doit = ovs_vport_cmd_set,
2222 },
2223};
2224
2225struct genl_family dp_vport_genl_family = {
2226 .id = GENL_ID_GENERATE,
2227 .hdrsize = sizeof(struct ovs_header),
2228 .name = OVS_VPORT_FAMILY,
2229 .version = OVS_VPORT_VERSION,
2230 .maxattr = OVS_VPORT_ATTR_MAX,
2231 .netnsok = true,
2232 .parallel_ops = true,
2233 .ops = dp_vport_genl_ops,
2234 .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2235 .mcgrps = &ovs_dp_vport_multicast_group,
2236 .n_mcgrps = 1,
2237};
2238
2239static struct genl_family * const dp_genl_families[] = {
2240 &dp_datapath_genl_family,
2241 &dp_vport_genl_family,
2242 &dp_flow_genl_family,
2243 &dp_packet_genl_family,
2244};
2245
2246static void dp_unregister_genl(int n_families)
2247{
2248 int i;
2249
2250 for (i = 0; i < n_families; i++)
2251 genl_unregister_family(dp_genl_families[i]);
2252}
2253
2254static int dp_register_genl(void)
2255{
2256 int err;
2257 int i;
2258
2259 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2260
2261 err = genl_register_family(dp_genl_families[i]);
2262 if (err)
2263 goto error;
2264 }
2265
2266 return 0;
2267
2268error:
2269 dp_unregister_genl(i);
2270 return err;
2271}
2272
2273static int __net_init ovs_init_net(struct net *net)
2274{
2275 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2276
2277 INIT_LIST_HEAD(&ovs_net->dps);
2278 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2279 ovs_ct_init(net);
2280 return 0;
2281}
2282
2283static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2284 struct list_head *head)
2285{
2286 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2287 struct datapath *dp;
2288
2289 list_for_each_entry(dp, &ovs_net->dps, list_node) {
2290 int i;
2291
2292 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2293 struct vport *vport;
2294
2295 hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2296 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2297 continue;
2298
2299 if (dev_net(vport->dev) == dnet)
2300 list_add(&vport->detach_list, head);
2301 }
2302 }
2303 }
2304}
2305
2306static void __net_exit ovs_exit_net(struct net *dnet)
2307{
2308 struct datapath *dp, *dp_next;
2309 struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2310 struct vport *vport, *vport_next;
2311 struct net *net;
2312 LIST_HEAD(head);
2313
2314 ovs_ct_exit(dnet);
2315 ovs_lock();
2316 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2317 __dp_destroy(dp);
2318
2319 rtnl_lock();
2320 for_each_net(net)
2321 list_vports_from_net(net, dnet, &head);
2322 rtnl_unlock();
2323
2324 /* Detach all vports from given namespace. */
2325 list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2326 list_del(&vport->detach_list);
2327 ovs_dp_detach_port(vport);
2328 }
2329
2330 ovs_unlock();
2331
2332 cancel_work_sync(&ovs_net->dp_notify_work);
2333}
2334
2335static struct pernet_operations ovs_net_ops = {
2336 .init = ovs_init_net,
2337 .exit = ovs_exit_net,
2338 .id = &ovs_net_id,
2339 .size = sizeof(struct ovs_net),
2340};
2341
2342static int __init dp_init(void)
2343{
2344 int err;
2345
2346 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2347
2348 pr_info("Open vSwitch switching datapath\n");
2349
2350 err = action_fifos_init();
2351 if (err)
2352 goto error;
2353
2354 err = ovs_internal_dev_rtnl_link_register();
2355 if (err)
2356 goto error_action_fifos_exit;
2357
2358 err = ovs_flow_init();
2359 if (err)
2360 goto error_unreg_rtnl_link;
2361
2362 err = ovs_vport_init();
2363 if (err)
2364 goto error_flow_exit;
2365
2366 err = register_pernet_device(&ovs_net_ops);
2367 if (err)
2368 goto error_vport_exit;
2369
2370 err = register_netdevice_notifier(&ovs_dp_device_notifier);
2371 if (err)
2372 goto error_netns_exit;
2373
2374 err = ovs_netdev_init();
2375 if (err)
2376 goto error_unreg_notifier;
2377
2378 err = dp_register_genl();
2379 if (err < 0)
2380 goto error_unreg_netdev;
2381
2382 return 0;
2383
2384error_unreg_netdev:
2385 ovs_netdev_exit();
2386error_unreg_notifier:
2387 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2388error_netns_exit:
2389 unregister_pernet_device(&ovs_net_ops);
2390error_vport_exit:
2391 ovs_vport_exit();
2392error_flow_exit:
2393 ovs_flow_exit();
2394error_unreg_rtnl_link:
2395 ovs_internal_dev_rtnl_link_unregister();
2396error_action_fifos_exit:
2397 action_fifos_exit();
2398error:
2399 return err;
2400}
2401
2402static void dp_cleanup(void)
2403{
2404 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2405 ovs_netdev_exit();
2406 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2407 unregister_pernet_device(&ovs_net_ops);
2408 rcu_barrier();
2409 ovs_vport_exit();
2410 ovs_flow_exit();
2411 ovs_internal_dev_rtnl_link_unregister();
2412 action_fifos_exit();
2413}
2414
2415module_init(dp_init);
2416module_exit(dp_cleanup);
2417
2418MODULE_DESCRIPTION("Open vSwitch switching datapath");
2419MODULE_LICENSE("GPL");