Linux Audio

Check our new training course

Loading...
v3.1
  1/*
  2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
  3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
  4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
  5 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  6 *
  7 * This software is available to you under a choice of one of two
  8 * licenses.  You may choose to be licensed under the terms of the GNU
  9 * General Public License (GPL) Version 2, available from the file
 10 * COPYING in the main directory of this source tree, or the
 11 * OpenIB.org BSD license below:
 12 *
 13 *     Redistribution and use in source and binary forms, with or
 14 *     without modification, are permitted provided that the following
 15 *     conditions are met:
 16 *
 17 *      - Redistributions of source code must retain the above
 18 *        copyright notice, this list of conditions and the following
 19 *        disclaimer.
 20 *
 21 *      - Redistributions in binary form must reproduce the above
 22 *        copyright notice, this list of conditions and the following
 23 *        disclaimer in the documentation and/or other materials
 24 *        provided with the distribution.
 25 *
 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 33 * SOFTWARE.
 34 */
 35
 36#include <linux/mutex.h>
 37#include <linux/inetdevice.h>
 38#include <linux/slab.h>
 39#include <linux/workqueue.h>
 
 40#include <net/arp.h>
 41#include <net/neighbour.h>
 42#include <net/route.h>
 43#include <net/netevent.h>
 44#include <net/addrconf.h>
 45#include <net/ip6_route.h>
 46#include <rdma/ib_addr.h>
 
 
 
 47
 48MODULE_AUTHOR("Sean Hefty");
 49MODULE_DESCRIPTION("IB Address Translation");
 50MODULE_LICENSE("Dual BSD/GPL");
 51
 52struct addr_req {
 53	struct list_head list;
 54	struct sockaddr_storage src_addr;
 55	struct sockaddr_storage dst_addr;
 56	struct rdma_dev_addr *addr;
 57	struct rdma_addr_client *client;
 58	void *context;
 59	void (*callback)(int status, struct sockaddr *src_addr,
 60			 struct rdma_dev_addr *addr, void *context);
 61	unsigned long timeout;
 
 62	int status;
 
 63};
 64
 
 
 65static void process_req(struct work_struct *work);
 66
 67static DEFINE_MUTEX(lock);
 68static LIST_HEAD(req_list);
 69static DECLARE_DELAYED_WORK(work, process_req);
 70static struct workqueue_struct *addr_wq;
 71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 72void rdma_addr_register_client(struct rdma_addr_client *client)
 73{
 74	atomic_set(&client->refcount, 1);
 75	init_completion(&client->comp);
 76}
 77EXPORT_SYMBOL(rdma_addr_register_client);
 78
 79static inline void put_client(struct rdma_addr_client *client)
 80{
 81	if (atomic_dec_and_test(&client->refcount))
 82		complete(&client->comp);
 83}
 84
 85void rdma_addr_unregister_client(struct rdma_addr_client *client)
 86{
 87	put_client(client);
 88	wait_for_completion(&client->comp);
 89}
 90EXPORT_SYMBOL(rdma_addr_unregister_client);
 91
 92int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 93		     const unsigned char *dst_dev_addr)
 
 94{
 95	dev_addr->dev_type = dev->type;
 96	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
 97	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
 98	if (dst_dev_addr)
 99		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
100	dev_addr->bound_dev_if = dev->ifindex;
101	return 0;
102}
103EXPORT_SYMBOL(rdma_copy_addr);
104
105int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 
106{
107	struct net_device *dev;
108	int ret = -EADDRNOTAVAIL;
109
110	if (dev_addr->bound_dev_if) {
111		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
112		if (!dev)
113			return -ENODEV;
114		ret = rdma_copy_addr(dev_addr, dev, NULL);
115		dev_put(dev);
116		return ret;
117	}
118
119	switch (addr->sa_family) {
120	case AF_INET:
121		dev = ip_dev_find(&init_net,
122			((struct sockaddr_in *) addr)->sin_addr.s_addr);
123
124		if (!dev)
125			return ret;
126
127		ret = rdma_copy_addr(dev_addr, dev, NULL);
128		dev_put(dev);
129		break;
130
131#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
132	case AF_INET6:
133		rcu_read_lock();
134		for_each_netdev_rcu(&init_net, dev) {
135			if (ipv6_chk_addr(&init_net,
136					  &((struct sockaddr_in6 *) addr)->sin6_addr,
137					  dev, 1)) {
138				ret = rdma_copy_addr(dev_addr, dev, NULL);
139				break;
140			}
141		}
142		rcu_read_unlock();
143		break;
144#endif
145	}
146	return ret;
147}
148EXPORT_SYMBOL(rdma_translate_ip);
149
150static void set_timeout(unsigned long time)
151{
152	unsigned long delay;
153
154	cancel_delayed_work(&work);
155
156	delay = time - jiffies;
157	if ((long)delay <= 0)
158		delay = 1;
159
160	queue_delayed_work(addr_wq, &work, delay);
161}
162
163static void queue_req(struct addr_req *req)
164{
165	struct addr_req *temp_req;
166
167	mutex_lock(&lock);
168	list_for_each_entry_reverse(temp_req, &req_list, list) {
169		if (time_after_eq(req->timeout, temp_req->timeout))
170			break;
171	}
172
173	list_add(&req->list, &temp_req->list);
174
175	if (req_list.next == &req->list)
176		set_timeout(req->timeout);
177	mutex_unlock(&lock);
178}
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180static int addr4_resolve(struct sockaddr_in *src_in,
181			 struct sockaddr_in *dst_in,
182			 struct rdma_dev_addr *addr)
 
183{
184	__be32 src_ip = src_in->sin_addr.s_addr;
185	__be32 dst_ip = dst_in->sin_addr.s_addr;
186	struct rtable *rt;
187	struct neighbour *neigh;
188	struct flowi4 fl4;
189	int ret;
190
191	memset(&fl4, 0, sizeof(fl4));
192	fl4.daddr = dst_ip;
193	fl4.saddr = src_ip;
194	fl4.flowi4_oif = addr->bound_dev_if;
195	rt = ip_route_output_key(&init_net, &fl4);
196	if (IS_ERR(rt)) {
197		ret = PTR_ERR(rt);
198		goto out;
199	}
200	src_in->sin_family = AF_INET;
201	src_in->sin_addr.s_addr = fl4.saddr;
202
203	if (rt->dst.dev->flags & IFF_LOOPBACK) {
204		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
205		if (!ret)
206			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
207		goto put;
208	}
209
210	/* If the device does ARP internally, return 'done' */
211	if (rt->dst.dev->flags & IFF_NOARP) {
212		ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
213		goto put;
214	}
215
216	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->dst.dev);
217	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
218		neigh_event_send(dst_get_neighbour(&rt->dst), NULL);
219		ret = -ENODATA;
220		if (neigh)
221			goto release;
222		goto put;
223	}
224
225	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
226release:
227	neigh_release(neigh);
228put:
229	ip_rt_put(rt);
230out:
231	return ret;
232}
233
234#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
235static int addr6_resolve(struct sockaddr_in6 *src_in,
236			 struct sockaddr_in6 *dst_in,
237			 struct rdma_dev_addr *addr)
 
238{
239	struct flowi6 fl6;
240	struct neighbour *neigh;
241	struct dst_entry *dst;
 
242	int ret;
243
244	memset(&fl6, 0, sizeof fl6);
245	ipv6_addr_copy(&fl6.daddr, &dst_in->sin6_addr);
246	ipv6_addr_copy(&fl6.saddr, &src_in->sin6_addr);
247	fl6.flowi6_oif = addr->bound_dev_if;
248
249	dst = ip6_route_output(&init_net, NULL, &fl6);
250	if ((ret = dst->error))
251		goto put;
252
253	if (ipv6_addr_any(&fl6.saddr)) {
254		ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
255					 &fl6.daddr, 0, &fl6.saddr);
256		if (ret)
257			goto put;
258
 
 
259		src_in->sin6_family = AF_INET6;
260		ipv6_addr_copy(&src_in->sin6_addr, &fl6.saddr);
261	}
262
263	if (dst->dev->flags & IFF_LOOPBACK) {
264		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
265		if (!ret)
266			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
267		goto put;
268	}
269
270	/* If the device does ARP internally, return 'done' */
271	if (dst->dev->flags & IFF_NOARP) {
272		ret = rdma_copy_addr(addr, dst->dev, NULL);
273		goto put;
274	}
 
 
275
276	neigh = dst_get_neighbour(dst);
277	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
278		if (neigh)
279			neigh_event_send(neigh, NULL);
280		ret = -ENODATA;
281		goto put;
282	}
283
284	ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
285put:
286	dst_release(dst);
287	return ret;
288}
289#else
290static int addr6_resolve(struct sockaddr_in6 *src_in,
291			 struct sockaddr_in6 *dst_in,
292			 struct rdma_dev_addr *addr)
 
293{
294	return -EADDRNOTAVAIL;
295}
296#endif
297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298static int addr_resolve(struct sockaddr *src_in,
299			struct sockaddr *dst_in,
300			struct rdma_dev_addr *addr)
 
 
301{
 
 
 
 
 
 
 
 
 
302	if (src_in->sa_family == AF_INET) {
303		return addr4_resolve((struct sockaddr_in *) src_in,
304			(struct sockaddr_in *) dst_in, addr);
305	} else
306		return addr6_resolve((struct sockaddr_in6 *) src_in,
307			(struct sockaddr_in6 *) dst_in, addr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308}
309
310static void process_req(struct work_struct *work)
311{
312	struct addr_req *req, *temp_req;
313	struct sockaddr *src_in, *dst_in;
314	struct list_head done_list;
315
316	INIT_LIST_HEAD(&done_list);
317
318	mutex_lock(&lock);
319	list_for_each_entry_safe(req, temp_req, &req_list, list) {
320		if (req->status == -ENODATA) {
321			src_in = (struct sockaddr *) &req->src_addr;
322			dst_in = (struct sockaddr *) &req->dst_addr;
323			req->status = addr_resolve(src_in, dst_in, req->addr);
 
324			if (req->status && time_after_eq(jiffies, req->timeout))
325				req->status = -ETIMEDOUT;
326			else if (req->status == -ENODATA)
 
327				continue;
 
328		}
329		list_move_tail(&req->list, &done_list);
330	}
331
332	if (!list_empty(&req_list)) {
333		req = list_entry(req_list.next, struct addr_req, list);
334		set_timeout(req->timeout);
335	}
336	mutex_unlock(&lock);
337
338	list_for_each_entry_safe(req, temp_req, &done_list, list) {
339		list_del(&req->list);
 
 
 
 
 
340		req->callback(req->status, (struct sockaddr *) &req->src_addr,
341			req->addr, req->context);
342		put_client(req->client);
343		kfree(req);
344	}
345}
346
347int rdma_resolve_ip(struct rdma_addr_client *client,
348		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
349		    struct rdma_dev_addr *addr, int timeout_ms,
350		    void (*callback)(int status, struct sockaddr *src_addr,
351				     struct rdma_dev_addr *addr, void *context),
352		    void *context)
353{
354	struct sockaddr *src_in, *dst_in;
355	struct addr_req *req;
356	int ret = 0;
357
358	req = kzalloc(sizeof *req, GFP_KERNEL);
359	if (!req)
360		return -ENOMEM;
361
362	src_in = (struct sockaddr *) &req->src_addr;
363	dst_in = (struct sockaddr *) &req->dst_addr;
364
365	if (src_addr) {
366		if (src_addr->sa_family != dst_addr->sa_family) {
367			ret = -EINVAL;
368			goto err;
369		}
370
371		memcpy(src_in, src_addr, ip_addr_size(src_addr));
372	} else {
373		src_in->sa_family = dst_addr->sa_family;
374	}
375
376	memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
377	req->addr = addr;
378	req->callback = callback;
379	req->context = context;
380	req->client = client;
381	atomic_inc(&client->refcount);
 
 
382
383	req->status = addr_resolve(src_in, dst_in, addr);
384	switch (req->status) {
385	case 0:
386		req->timeout = jiffies;
387		queue_req(req);
388		break;
389	case -ENODATA:
390		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
391		queue_req(req);
392		break;
393	default:
394		ret = req->status;
395		atomic_dec(&client->refcount);
396		goto err;
397	}
398	return ret;
399err:
400	kfree(req);
401	return ret;
402}
403EXPORT_SYMBOL(rdma_resolve_ip);
404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405void rdma_addr_cancel(struct rdma_dev_addr *addr)
406{
407	struct addr_req *req, *temp_req;
408
409	mutex_lock(&lock);
410	list_for_each_entry_safe(req, temp_req, &req_list, list) {
411		if (req->addr == addr) {
412			req->status = -ECANCELED;
413			req->timeout = jiffies;
414			list_move(&req->list, &req_list);
415			set_timeout(req->timeout);
416			break;
417		}
418	}
419	mutex_unlock(&lock);
420}
421EXPORT_SYMBOL(rdma_addr_cancel);
422
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423static int netevent_callback(struct notifier_block *self, unsigned long event,
424	void *ctx)
425{
426	if (event == NETEVENT_NEIGH_UPDATE) {
427		struct neighbour *neigh = ctx;
428
429		if (neigh->nud_state & NUD_VALID) {
430			set_timeout(jiffies);
431		}
432	}
433	return 0;
434}
435
436static struct notifier_block nb = {
437	.notifier_call = netevent_callback
438};
439
440static int __init addr_init(void)
441{
442	addr_wq = create_singlethread_workqueue("ib_addr");
443	if (!addr_wq)
444		return -ENOMEM;
445
446	register_netevent_notifier(&nb);
 
 
447	return 0;
448}
449
450static void __exit addr_cleanup(void)
451{
 
452	unregister_netevent_notifier(&nb);
453	destroy_workqueue(addr_wq);
454}
455
456module_init(addr_init);
457module_exit(addr_cleanup);
v4.17
  1/*
  2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
  3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
  4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
  5 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  6 *
  7 * This software is available to you under a choice of one of two
  8 * licenses.  You may choose to be licensed under the terms of the GNU
  9 * General Public License (GPL) Version 2, available from the file
 10 * COPYING in the main directory of this source tree, or the
 11 * OpenIB.org BSD license below:
 12 *
 13 *     Redistribution and use in source and binary forms, with or
 14 *     without modification, are permitted provided that the following
 15 *     conditions are met:
 16 *
 17 *      - Redistributions of source code must retain the above
 18 *        copyright notice, this list of conditions and the following
 19 *        disclaimer.
 20 *
 21 *      - Redistributions in binary form must reproduce the above
 22 *        copyright notice, this list of conditions and the following
 23 *        disclaimer in the documentation and/or other materials
 24 *        provided with the distribution.
 25 *
 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 33 * SOFTWARE.
 34 */
 35
 36#include <linux/mutex.h>
 37#include <linux/inetdevice.h>
 38#include <linux/slab.h>
 39#include <linux/workqueue.h>
 40#include <linux/module.h>
 41#include <net/arp.h>
 42#include <net/neighbour.h>
 43#include <net/route.h>
 44#include <net/netevent.h>
 45#include <net/addrconf.h>
 46#include <net/ip6_route.h>
 47#include <rdma/ib_addr.h>
 48#include <rdma/ib.h>
 49#include <rdma/rdma_netlink.h>
 50#include <net/netlink.h>
 51
 52#include "core_priv.h"
 
 
 53
 54struct addr_req {
 55	struct list_head list;
 56	struct sockaddr_storage src_addr;
 57	struct sockaddr_storage dst_addr;
 58	struct rdma_dev_addr *addr;
 59	struct rdma_addr_client *client;
 60	void *context;
 61	void (*callback)(int status, struct sockaddr *src_addr,
 62			 struct rdma_dev_addr *addr, void *context);
 63	unsigned long timeout;
 64	struct delayed_work work;
 65	int status;
 66	u32 seq;
 67};
 68
 69static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
 70
 71static void process_req(struct work_struct *work);
 72
 73static DEFINE_MUTEX(lock);
 74static LIST_HEAD(req_list);
 75static DECLARE_DELAYED_WORK(work, process_req);
 76static struct workqueue_struct *addr_wq;
 77
 78static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
 79	[LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
 80		.len = sizeof(struct rdma_nla_ls_gid)},
 81};
 82
 83static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
 84{
 85	struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
 86	int ret;
 87
 88	if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
 89		return false;
 90
 91	ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
 92			nlmsg_len(nlh), ib_nl_addr_policy, NULL);
 93	if (ret)
 94		return false;
 95
 96	return true;
 97}
 98
 99static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
100{
101	const struct nlattr *head, *curr;
102	union ib_gid gid;
103	struct addr_req *req;
104	int len, rem;
105	int found = 0;
106
107	head = (const struct nlattr *)nlmsg_data(nlh);
108	len = nlmsg_len(nlh);
109
110	nla_for_each_attr(curr, head, len, rem) {
111		if (curr->nla_type == LS_NLA_TYPE_DGID)
112			memcpy(&gid, nla_data(curr), nla_len(curr));
113	}
114
115	mutex_lock(&lock);
116	list_for_each_entry(req, &req_list, list) {
117		if (nlh->nlmsg_seq != req->seq)
118			continue;
119		/* We set the DGID part, the rest was set earlier */
120		rdma_addr_set_dgid(req->addr, &gid);
121		req->status = 0;
122		found = 1;
123		break;
124	}
125	mutex_unlock(&lock);
126
127	if (!found)
128		pr_info("Couldn't find request waiting for DGID: %pI6\n",
129			&gid);
130}
131
132int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
133			     struct nlmsghdr *nlh,
134			     struct netlink_ext_ack *extack)
135{
136	if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
137	    !(NETLINK_CB(skb).sk))
138		return -EPERM;
139
140	if (ib_nl_is_good_ip_resp(nlh))
141		ib_nl_process_good_ip_rsep(nlh);
142
143	return skb->len;
144}
145
146static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
147			     const void *daddr,
148			     u32 seq, u16 family)
149{
150	struct sk_buff *skb = NULL;
151	struct nlmsghdr *nlh;
152	struct rdma_ls_ip_resolve_header *header;
153	void *data;
154	size_t size;
155	int attrtype;
156	int len;
157
158	if (family == AF_INET) {
159		size = sizeof(struct in_addr);
160		attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4;
161	} else {
162		size = sizeof(struct in6_addr);
163		attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
164	}
165
166	len = nla_total_size(sizeof(size));
167	len += NLMSG_ALIGN(sizeof(*header));
168
169	skb = nlmsg_new(len, GFP_KERNEL);
170	if (!skb)
171		return -ENOMEM;
172
173	data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS,
174			    RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST);
175	if (!data) {
176		nlmsg_free(skb);
177		return -ENODATA;
178	}
179
180	/* Construct the family header first */
181	header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
182	header->ifindex = dev_addr->bound_dev_if;
183	nla_put(skb, attrtype, size, daddr);
184
185	/* Repair the nlmsg header length */
186	nlmsg_end(skb, nlh);
187	rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
188
189	/* Make the request retry, so when we get the response from userspace
190	 * we will have something.
191	 */
192	return -ENODATA;
193}
194
195int rdma_addr_size(struct sockaddr *addr)
196{
197	switch (addr->sa_family) {
198	case AF_INET:
199		return sizeof(struct sockaddr_in);
200	case AF_INET6:
201		return sizeof(struct sockaddr_in6);
202	case AF_IB:
203		return sizeof(struct sockaddr_ib);
204	default:
205		return 0;
206	}
207}
208EXPORT_SYMBOL(rdma_addr_size);
209
210int rdma_addr_size_in6(struct sockaddr_in6 *addr)
211{
212	int ret = rdma_addr_size((struct sockaddr *) addr);
213
214	return ret <= sizeof(*addr) ? ret : 0;
215}
216EXPORT_SYMBOL(rdma_addr_size_in6);
217
218int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
219{
220	int ret = rdma_addr_size((struct sockaddr *) addr);
221
222	return ret <= sizeof(*addr) ? ret : 0;
223}
224EXPORT_SYMBOL(rdma_addr_size_kss);
225
226static struct rdma_addr_client self;
227
228void rdma_addr_register_client(struct rdma_addr_client *client)
229{
230	atomic_set(&client->refcount, 1);
231	init_completion(&client->comp);
232}
233EXPORT_SYMBOL(rdma_addr_register_client);
234
235static inline void put_client(struct rdma_addr_client *client)
236{
237	if (atomic_dec_and_test(&client->refcount))
238		complete(&client->comp);
239}
240
241void rdma_addr_unregister_client(struct rdma_addr_client *client)
242{
243	put_client(client);
244	wait_for_completion(&client->comp);
245}
246EXPORT_SYMBOL(rdma_addr_unregister_client);
247
248void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
249		    const struct net_device *dev,
250		    const unsigned char *dst_dev_addr)
251{
252	dev_addr->dev_type = dev->type;
253	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
254	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
255	if (dst_dev_addr)
256		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
257	dev_addr->bound_dev_if = dev->ifindex;
 
258}
259EXPORT_SYMBOL(rdma_copy_addr);
260
261int rdma_translate_ip(const struct sockaddr *addr,
262		      struct rdma_dev_addr *dev_addr)
263{
264	struct net_device *dev;
 
265
266	if (dev_addr->bound_dev_if) {
267		dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
268		if (!dev)
269			return -ENODEV;
270		rdma_copy_addr(dev_addr, dev, NULL);
271		dev_put(dev);
272		return 0;
273	}
274
275	switch (addr->sa_family) {
276	case AF_INET:
277		dev = ip_dev_find(dev_addr->net,
278			((const struct sockaddr_in *)addr)->sin_addr.s_addr);
279
280		if (!dev)
281			return -EADDRNOTAVAIL;
282
283		rdma_copy_addr(dev_addr, dev, NULL);
284		dev_put(dev);
285		break;
286#if IS_ENABLED(CONFIG_IPV6)
 
287	case AF_INET6:
288		rcu_read_lock();
289		for_each_netdev_rcu(dev_addr->net, dev) {
290			if (ipv6_chk_addr(dev_addr->net,
291					  &((const struct sockaddr_in6 *)addr)->sin6_addr,
292					  dev, 1)) {
293				rdma_copy_addr(dev_addr, dev, NULL);
294				break;
295			}
296		}
297		rcu_read_unlock();
298		break;
299#endif
300	}
301	return 0;
302}
303EXPORT_SYMBOL(rdma_translate_ip);
304
305static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
306{
307	unsigned long delay;
308
 
 
309	delay = time - jiffies;
310	if ((long)delay < 0)
311		delay = 0;
312
313	mod_delayed_work(addr_wq, delayed_work, delay);
314}
315
316static void queue_req(struct addr_req *req)
317{
318	struct addr_req *temp_req;
319
320	mutex_lock(&lock);
321	list_for_each_entry_reverse(temp_req, &req_list, list) {
322		if (time_after_eq(req->timeout, temp_req->timeout))
323			break;
324	}
325
326	list_add(&req->list, &temp_req->list);
327
328	set_timeout(&req->work, req->timeout);
 
329	mutex_unlock(&lock);
330}
331
332static int ib_nl_fetch_ha(const struct dst_entry *dst,
333			  struct rdma_dev_addr *dev_addr,
334			  const void *daddr, u32 seq, u16 family)
335{
336	if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
337		return -EADDRNOTAVAIL;
338
339	/* We fill in what we can, the response will fill the rest */
340	rdma_copy_addr(dev_addr, dst->dev, NULL);
341	return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
342}
343
344static int dst_fetch_ha(const struct dst_entry *dst,
345			struct rdma_dev_addr *dev_addr,
346			const void *daddr)
347{
348	struct neighbour *n;
349	int ret = 0;
350
351	n = dst_neigh_lookup(dst, daddr);
352
353	rcu_read_lock();
354	if (!n || !(n->nud_state & NUD_VALID)) {
355		if (n)
356			neigh_event_send(n, NULL);
357		ret = -ENODATA;
358	} else {
359		rdma_copy_addr(dev_addr, dst->dev, n->ha);
360	}
361	rcu_read_unlock();
362
363	if (n)
364		neigh_release(n);
365
366	return ret;
367}
368
369static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
370{
371	struct rtable *rt;
372	struct rt6_info *rt6;
373
374	if (family == AF_INET) {
375		rt = container_of(dst, struct rtable, dst);
376		return rt->rt_uses_gateway;
377	}
378
379	rt6 = container_of(dst, struct rt6_info, dst);
380	return rt6->rt6i_flags & RTF_GATEWAY;
381}
382
383static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
384		    const struct sockaddr *dst_in, u32 seq)
385{
386	const struct sockaddr_in *dst_in4 =
387		(const struct sockaddr_in *)dst_in;
388	const struct sockaddr_in6 *dst_in6 =
389		(const struct sockaddr_in6 *)dst_in;
390	const void *daddr = (dst_in->sa_family == AF_INET) ?
391		(const void *)&dst_in4->sin_addr.s_addr :
392		(const void *)&dst_in6->sin6_addr;
393	sa_family_t family = dst_in->sa_family;
394
395	/* Gateway + ARPHRD_INFINIBAND -> IB router */
396	if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
397		return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
398	else
399		return dst_fetch_ha(dst, dev_addr, daddr);
400}
401
402static int addr4_resolve(struct sockaddr_in *src_in,
403			 const struct sockaddr_in *dst_in,
404			 struct rdma_dev_addr *addr,
405			 struct rtable **prt)
406{
407	__be32 src_ip = src_in->sin_addr.s_addr;
408	__be32 dst_ip = dst_in->sin_addr.s_addr;
409	struct rtable *rt;
 
410	struct flowi4 fl4;
411	int ret;
412
413	memset(&fl4, 0, sizeof(fl4));
414	fl4.daddr = dst_ip;
415	fl4.saddr = src_ip;
416	fl4.flowi4_oif = addr->bound_dev_if;
417	rt = ip_route_output_key(addr->net, &fl4);
418	ret = PTR_ERR_OR_ZERO(rt);
419	if (ret)
420		return ret;
421
422	src_in->sin_family = AF_INET;
423	src_in->sin_addr.s_addr = fl4.saddr;
424
425	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
426	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
427	 * type accordingly.
428	 */
429	if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
430		addr->network = RDMA_NETWORK_IPV4;
 
 
 
 
 
 
431
432	addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
 
 
 
 
 
 
 
433
434	*prt = rt;
435	return 0;
 
 
 
 
 
436}
437
438#if IS_ENABLED(CONFIG_IPV6)
439static int addr6_resolve(struct sockaddr_in6 *src_in,
440			 const struct sockaddr_in6 *dst_in,
441			 struct rdma_dev_addr *addr,
442			 struct dst_entry **pdst)
443{
444	struct flowi6 fl6;
 
445	struct dst_entry *dst;
446	struct rt6_info *rt;
447	int ret;
448
449	memset(&fl6, 0, sizeof fl6);
450	fl6.daddr = dst_in->sin6_addr;
451	fl6.saddr = src_in->sin6_addr;
452	fl6.flowi6_oif = addr->bound_dev_if;
453
454	ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
455	if (ret < 0)
456		return ret;
 
 
 
 
 
 
457
458	rt = (struct rt6_info *)dst;
459	if (ipv6_addr_any(&src_in->sin6_addr)) {
460		src_in->sin6_family = AF_INET6;
461		src_in->sin6_addr = fl6.saddr;
 
 
 
 
 
 
 
462	}
463
464	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
465	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
466	 * type accordingly.
467	 */
468	if (rt->rt6i_flags & RTF_GATEWAY &&
469	    ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
470		addr->network = RDMA_NETWORK_IPV6;
471
472	addr->hoplimit = ip6_dst_hoplimit(dst);
 
 
 
 
 
 
473
474	*pdst = dst;
475	return 0;
 
 
476}
477#else
478static int addr6_resolve(struct sockaddr_in6 *src_in,
479			 const struct sockaddr_in6 *dst_in,
480			 struct rdma_dev_addr *addr,
481			 struct dst_entry **pdst)
482{
483	return -EADDRNOTAVAIL;
484}
485#endif
486
487static int addr_resolve_neigh(const struct dst_entry *dst,
488			      const struct sockaddr *dst_in,
489			      struct rdma_dev_addr *addr,
490			      u32 seq)
491{
492	if (dst->dev->flags & IFF_LOOPBACK) {
493		int ret;
494
495		ret = rdma_translate_ip(dst_in, addr);
496		if (!ret)
497			memcpy(addr->dst_dev_addr, addr->src_dev_addr,
498			       MAX_ADDR_LEN);
499
500		return ret;
501	}
502
503	/* If the device doesn't do ARP internally */
504	if (!(dst->dev->flags & IFF_NOARP))
505		return fetch_ha(dst, addr, dst_in, seq);
506
507	rdma_copy_addr(addr, dst->dev, NULL);
508
509	return 0;
510}
511
512static int addr_resolve(struct sockaddr *src_in,
513			const struct sockaddr *dst_in,
514			struct rdma_dev_addr *addr,
515			bool resolve_neigh,
516			u32 seq)
517{
518	struct net_device *ndev;
519	struct dst_entry *dst;
520	int ret;
521
522	if (!addr->net) {
523		pr_warn_ratelimited("%s: missing namespace\n", __func__);
524		return -EINVAL;
525	}
526
527	if (src_in->sa_family == AF_INET) {
528		struct rtable *rt = NULL;
529		const struct sockaddr_in *dst_in4 =
530			(const struct sockaddr_in *)dst_in;
531
532		ret = addr4_resolve((struct sockaddr_in *)src_in,
533				    dst_in4, addr, &rt);
534		if (ret)
535			return ret;
536
537		if (resolve_neigh)
538			ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
539
540		if (addr->bound_dev_if) {
541			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
542		} else {
543			ndev = rt->dst.dev;
544			dev_hold(ndev);
545		}
546
547		ip_rt_put(rt);
548	} else {
549		const struct sockaddr_in6 *dst_in6 =
550			(const struct sockaddr_in6 *)dst_in;
551
552		ret = addr6_resolve((struct sockaddr_in6 *)src_in,
553				    dst_in6, addr,
554				    &dst);
555		if (ret)
556			return ret;
557
558		if (resolve_neigh)
559			ret = addr_resolve_neigh(dst, dst_in, addr, seq);
560
561		if (addr->bound_dev_if) {
562			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
563		} else {
564			ndev = dst->dev;
565			dev_hold(ndev);
566		}
567
568		dst_release(dst);
569	}
570
571	if (ndev) {
572		if (ndev->flags & IFF_LOOPBACK)
573			ret = rdma_translate_ip(dst_in, addr);
574		else
575			addr->bound_dev_if = ndev->ifindex;
576		dev_put(ndev);
577	}
578
579	return ret;
580}
581
582static void process_one_req(struct work_struct *_work)
583{
584	struct addr_req *req;
585	struct sockaddr *src_in, *dst_in;
586
587	mutex_lock(&lock);
588	req = container_of(_work, struct addr_req, work.work);
589
590	if (req->status == -ENODATA) {
591		src_in = (struct sockaddr *)&req->src_addr;
592		dst_in = (struct sockaddr *)&req->dst_addr;
593		req->status = addr_resolve(src_in, dst_in, req->addr,
594					   true, req->seq);
595		if (req->status && time_after_eq(jiffies, req->timeout)) {
596			req->status = -ETIMEDOUT;
597		} else if (req->status == -ENODATA) {
598			/* requeue the work for retrying again */
599			set_timeout(&req->work, req->timeout);
600			mutex_unlock(&lock);
601			return;
602		}
603	}
604	list_del(&req->list);
605	mutex_unlock(&lock);
606
607	/*
608	 * Although the work will normally have been canceled by the
609	 * workqueue, it can still be requeued as long as it is on the
610	 * req_list, so it could have been requeued before we grabbed &lock.
611	 * We need to cancel it after it is removed from req_list to really be
612	 * sure it is safe to free.
613	 */
614	cancel_delayed_work(&req->work);
615
616	req->callback(req->status, (struct sockaddr *)&req->src_addr,
617		req->addr, req->context);
618	put_client(req->client);
619	kfree(req);
620}
621
622static void process_req(struct work_struct *work)
623{
624	struct addr_req *req, *temp_req;
625	struct sockaddr *src_in, *dst_in;
626	struct list_head done_list;
627
628	INIT_LIST_HEAD(&done_list);
629
630	mutex_lock(&lock);
631	list_for_each_entry_safe(req, temp_req, &req_list, list) {
632		if (req->status == -ENODATA) {
633			src_in = (struct sockaddr *) &req->src_addr;
634			dst_in = (struct sockaddr *) &req->dst_addr;
635			req->status = addr_resolve(src_in, dst_in, req->addr,
636						   true, req->seq);
637			if (req->status && time_after_eq(jiffies, req->timeout))
638				req->status = -ETIMEDOUT;
639			else if (req->status == -ENODATA) {
640				set_timeout(&req->work, req->timeout);
641				continue;
642			}
643		}
644		list_move_tail(&req->list, &done_list);
645	}
646
 
 
 
 
647	mutex_unlock(&lock);
648
649	list_for_each_entry_safe(req, temp_req, &done_list, list) {
650		list_del(&req->list);
651		/* It is safe to cancel other work items from this work item
652		 * because at a time there can be only one work item running
653		 * with this single threaded work queue.
654		 */
655		cancel_delayed_work(&req->work);
656		req->callback(req->status, (struct sockaddr *) &req->src_addr,
657			req->addr, req->context);
658		put_client(req->client);
659		kfree(req);
660	}
661}
662
663int rdma_resolve_ip(struct rdma_addr_client *client,
664		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
665		    struct rdma_dev_addr *addr, int timeout_ms,
666		    void (*callback)(int status, struct sockaddr *src_addr,
667				     struct rdma_dev_addr *addr, void *context),
668		    void *context)
669{
670	struct sockaddr *src_in, *dst_in;
671	struct addr_req *req;
672	int ret = 0;
673
674	req = kzalloc(sizeof *req, GFP_KERNEL);
675	if (!req)
676		return -ENOMEM;
677
678	src_in = (struct sockaddr *) &req->src_addr;
679	dst_in = (struct sockaddr *) &req->dst_addr;
680
681	if (src_addr) {
682		if (src_addr->sa_family != dst_addr->sa_family) {
683			ret = -EINVAL;
684			goto err;
685		}
686
687		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
688	} else {
689		src_in->sa_family = dst_addr->sa_family;
690	}
691
692	memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
693	req->addr = addr;
694	req->callback = callback;
695	req->context = context;
696	req->client = client;
697	atomic_inc(&client->refcount);
698	INIT_DELAYED_WORK(&req->work, process_one_req);
699	req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
700
701	req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
702	switch (req->status) {
703	case 0:
704		req->timeout = jiffies;
705		queue_req(req);
706		break;
707	case -ENODATA:
708		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
709		queue_req(req);
710		break;
711	default:
712		ret = req->status;
713		atomic_dec(&client->refcount);
714		goto err;
715	}
716	return ret;
717err:
718	kfree(req);
719	return ret;
720}
721EXPORT_SYMBOL(rdma_resolve_ip);
722
723int rdma_resolve_ip_route(struct sockaddr *src_addr,
724			  const struct sockaddr *dst_addr,
725			  struct rdma_dev_addr *addr)
726{
727	struct sockaddr_storage ssrc_addr = {};
728	struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
729
730	if (src_addr) {
731		if (src_addr->sa_family != dst_addr->sa_family)
732			return -EINVAL;
733
734		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
735	} else {
736		src_in->sa_family = dst_addr->sa_family;
737	}
738
739	return addr_resolve(src_in, dst_addr, addr, false, 0);
740}
741
742void rdma_addr_cancel(struct rdma_dev_addr *addr)
743{
744	struct addr_req *req, *temp_req;
745
746	mutex_lock(&lock);
747	list_for_each_entry_safe(req, temp_req, &req_list, list) {
748		if (req->addr == addr) {
749			req->status = -ECANCELED;
750			req->timeout = jiffies;
751			list_move(&req->list, &req_list);
752			set_timeout(&req->work, req->timeout);
753			break;
754		}
755	}
756	mutex_unlock(&lock);
757}
758EXPORT_SYMBOL(rdma_addr_cancel);
759
760struct resolve_cb_context {
761	struct completion comp;
762	int status;
763};
764
765static void resolve_cb(int status, struct sockaddr *src_addr,
766	     struct rdma_dev_addr *addr, void *context)
767{
768	((struct resolve_cb_context *)context)->status = status;
769	complete(&((struct resolve_cb_context *)context)->comp);
770}
771
772int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
773				 const union ib_gid *dgid,
774				 u8 *dmac, const struct net_device *ndev,
775				 int *hoplimit)
776{
777	struct rdma_dev_addr dev_addr;
778	struct resolve_cb_context ctx;
779	union {
780		struct sockaddr     _sockaddr;
781		struct sockaddr_in  _sockaddr_in;
782		struct sockaddr_in6 _sockaddr_in6;
783	} sgid_addr, dgid_addr;
784	int ret;
785
786	rdma_gid2ip(&sgid_addr._sockaddr, sgid);
787	rdma_gid2ip(&dgid_addr._sockaddr, dgid);
788
789	memset(&dev_addr, 0, sizeof(dev_addr));
790	dev_addr.bound_dev_if = ndev->ifindex;
791	dev_addr.net = &init_net;
792
793	init_completion(&ctx.comp);
794	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
795			&dev_addr, 1000, resolve_cb, &ctx);
796	if (ret)
797		return ret;
798
799	wait_for_completion(&ctx.comp);
800
801	ret = ctx.status;
802	if (ret)
803		return ret;
804
805	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
806	*hoplimit = dev_addr.hoplimit;
807	return 0;
808}
809
810static int netevent_callback(struct notifier_block *self, unsigned long event,
811	void *ctx)
812{
813	if (event == NETEVENT_NEIGH_UPDATE) {
814		struct neighbour *neigh = ctx;
815
816		if (neigh->nud_state & NUD_VALID)
817			set_timeout(&work, jiffies);
 
818	}
819	return 0;
820}
821
822static struct notifier_block nb = {
823	.notifier_call = netevent_callback
824};
825
826int addr_init(void)
827{
828	addr_wq = alloc_ordered_workqueue("ib_addr", 0);
829	if (!addr_wq)
830		return -ENOMEM;
831
832	register_netevent_notifier(&nb);
833	rdma_addr_register_client(&self);
834
835	return 0;
836}
837
838void addr_cleanup(void)
839{
840	rdma_addr_unregister_client(&self);
841	unregister_netevent_notifier(&nb);
842	destroy_workqueue(addr_wq);
843}