Line data Source code
1 : /* Kernel communication using netlink interface.
2 : * Copyright (C) 1999 Kunihiro Ishiguro
3 : *
4 : * This file is part of GNU Zebra.
5 : *
6 : * GNU Zebra is free software; you can redistribute it and/or modify it
7 : * under the terms of the GNU General Public License as published by the
8 : * Free Software Foundation; either version 2, or (at your option) any
9 : * later version.
10 : *
11 : * GNU Zebra is distributed in the hope that it will be useful, but
12 : * WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 : * General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU General Public License along
17 : * with this program; see the file COPYING; if not, write to the Free Software
18 : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 : */
20 :
21 : #include <zebra.h>
22 :
23 : #ifdef HAVE_NETLINK
24 :
25 : #include "linklist.h"
26 : #include "if.h"
27 : #include "log.h"
28 : #include "prefix.h"
29 : #include "connected.h"
30 : #include "table.h"
31 : #include "memory.h"
32 : #include "rib.h"
33 : #include "thread.h"
34 : #include "privs.h"
35 : #include "nexthop.h"
36 : #include "vrf.h"
37 : #include "mpls.h"
38 : #include "lib_errors.h"
39 : #include "hash.h"
40 :
41 : #include "zebra/zebra_router.h"
42 : #include "zebra/zebra_ns.h"
43 : #include "zebra/zebra_vrf.h"
44 : #include "zebra/rt.h"
45 : #include "zebra/debug.h"
46 : #include "zebra/kernel_netlink.h"
47 : #include "zebra/rt_netlink.h"
48 : #include "zebra/if_netlink.h"
49 : #include "zebra/rule_netlink.h"
50 : #include "zebra/tc_netlink.h"
51 : #include "zebra/netconf_netlink.h"
52 : #include "zebra/zebra_errors.h"
53 :
54 : #ifndef SO_RCVBUFFORCE
55 : #define SO_RCVBUFFORCE (33)
56 : #endif
57 :
58 : /* Hack for GNU libc version 2. */
59 : #ifndef MSG_TRUNC
60 : #define MSG_TRUNC 0x20
61 : #endif /* MSG_TRUNC */
62 :
63 : #ifndef NLMSG_TAIL
64 : #define NLMSG_TAIL(nmsg) \
65 : ((struct rtattr *)(((uint8_t *)(nmsg)) \
66 : + NLMSG_ALIGN((nmsg)->nlmsg_len)))
67 : #endif
68 :
69 : #ifndef RTA_TAIL
70 : #define RTA_TAIL(rta) \
71 : ((struct rtattr *)(((uint8_t *)(rta)) + RTA_ALIGN((rta)->rta_len)))
72 : #endif
73 :
74 : #ifndef RTNL_FAMILY_IP6MR
75 : #define RTNL_FAMILY_IP6MR 129
76 : #endif
77 :
78 : #ifndef RTPROT_MROUTED
79 : #define RTPROT_MROUTED 17
80 : #endif
81 :
82 : #define NL_DEFAULT_BATCH_BUFSIZE (16 * NL_PKT_BUF_SIZE)
83 :
84 : /*
85 : * We limit the batch's size to a number smaller than the length of the
86 : * underlying buffer since the last message that wouldn't fit the batch would go
87 : * over the upper boundary and then it would have to be encoded again into a new
88 : * buffer. If the difference between the limit and the length of the buffer is
89 : * big enough (bigger than the biggest Netlink message) then this situation
90 : * won't occur.
91 : */
92 : #define NL_DEFAULT_BATCH_SEND_THRESHOLD (15 * NL_PKT_BUF_SIZE)
93 :
94 : static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
95 : {RTM_DELROUTE, "RTM_DELROUTE"},
96 : {RTM_GETROUTE, "RTM_GETROUTE"},
97 : {RTM_NEWLINK, "RTM_NEWLINK"},
98 : {RTM_SETLINK, "RTM_SETLINK"},
99 : {RTM_DELLINK, "RTM_DELLINK"},
100 : {RTM_GETLINK, "RTM_GETLINK"},
101 : {RTM_NEWADDR, "RTM_NEWADDR"},
102 : {RTM_DELADDR, "RTM_DELADDR"},
103 : {RTM_GETADDR, "RTM_GETADDR"},
104 : {RTM_NEWNEIGH, "RTM_NEWNEIGH"},
105 : {RTM_DELNEIGH, "RTM_DELNEIGH"},
106 : {RTM_GETNEIGH, "RTM_GETNEIGH"},
107 : {RTM_NEWRULE, "RTM_NEWRULE"},
108 : {RTM_DELRULE, "RTM_DELRULE"},
109 : {RTM_GETRULE, "RTM_GETRULE"},
110 : {RTM_NEWNEXTHOP, "RTM_NEWNEXTHOP"},
111 : {RTM_DELNEXTHOP, "RTM_DELNEXTHOP"},
112 : {RTM_GETNEXTHOP, "RTM_GETNEXTHOP"},
113 : {RTM_NEWNETCONF, "RTM_NEWNETCONF"},
114 : {RTM_DELNETCONF, "RTM_DELNETCONF"},
115 : {RTM_NEWTUNNEL, "RTM_NEWTUNNEL"},
116 : {RTM_DELTUNNEL, "RTM_DELTUNNEL"},
117 : {RTM_GETTUNNEL, "RTM_GETTUNNEL"},
118 : {RTM_NEWQDISC, "RTM_NEWQDISC"},
119 : {RTM_DELQDISC, "RTM_DELQDISC"},
120 : {RTM_GETQDISC, "RTM_GETQDISC"},
121 : {RTM_NEWTCLASS, "RTM_NEWTCLASS"},
122 : {RTM_DELTCLASS, "RTM_DELTCLASS"},
123 : {RTM_GETTCLASS, "RTM_GETTCLASS"},
124 : {RTM_NEWTFILTER, "RTM_NEWTFILTER"},
125 : {RTM_DELTFILTER, "RTM_DELTFILTER"},
126 : {RTM_GETTFILTER, "RTM_GETTFILTER"},
127 : {0}};
128 :
129 : static const struct message rtproto_str[] = {
130 : {RTPROT_REDIRECT, "redirect"},
131 : {RTPROT_KERNEL, "kernel"},
132 : {RTPROT_BOOT, "boot"},
133 : {RTPROT_STATIC, "static"},
134 : {RTPROT_GATED, "GateD"},
135 : {RTPROT_RA, "router advertisement"},
136 : {RTPROT_MRT, "MRT"},
137 : {RTPROT_ZEBRA, "Zebra"},
138 : #ifdef RTPROT_BIRD
139 : {RTPROT_BIRD, "BIRD"},
140 : #endif /* RTPROT_BIRD */
141 : {RTPROT_MROUTED, "mroute"},
142 : {RTPROT_BGP, "BGP"},
143 : {RTPROT_OSPF, "OSPF"},
144 : {RTPROT_ISIS, "IS-IS"},
145 : {RTPROT_RIP, "RIP"},
146 : {RTPROT_RIPNG, "RIPNG"},
147 : {RTPROT_ZSTATIC, "static"},
148 : {0}};
149 :
150 : static const struct message family_str[] = {{AF_INET, "ipv4"},
151 : {AF_INET6, "ipv6"},
152 : {AF_BRIDGE, "bridge"},
153 : {RTNL_FAMILY_IPMR, "ipv4MR"},
154 : {RTNL_FAMILY_IP6MR, "ipv6MR"},
155 : {0}};
156 :
157 : static const struct message rttype_str[] = {{RTN_UNSPEC, "none"},
158 : {RTN_UNICAST, "unicast"},
159 : {RTN_LOCAL, "local"},
160 : {RTN_BROADCAST, "broadcast"},
161 : {RTN_ANYCAST, "anycast"},
162 : {RTN_MULTICAST, "multicast"},
163 : {RTN_BLACKHOLE, "blackhole"},
164 : {RTN_UNREACHABLE, "unreachable"},
165 : {RTN_PROHIBIT, "prohibited"},
166 : {RTN_THROW, "throw"},
167 : {RTN_NAT, "nat"},
168 : {RTN_XRESOLVE, "resolver"},
169 : {0}};
170 :
171 : extern struct thread_master *master;
172 :
173 : extern struct zebra_privs_t zserv_privs;
174 :
175 9 : DEFINE_MTYPE_STATIC(ZEBRA, NL_BUF, "Zebra Netlink buffers");
176 :
177 : /* Hashtable and mutex to allow lookup of nlsock structs by socket/fd value.
178 : * We have both the main and dplane pthreads using these structs, so we have
179 : * to protect the hash with a lock.
180 : */
181 : static struct hash *nlsock_hash;
182 : pthread_mutex_t nlsock_mutex;
183 :
184 : /* Lock and unlock wrappers for nlsock hash */
185 : #define NLSOCK_LOCK() pthread_mutex_lock(&nlsock_mutex)
186 : #define NLSOCK_UNLOCK() pthread_mutex_unlock(&nlsock_mutex)
187 :
188 : size_t nl_batch_tx_bufsize;
189 : char *nl_batch_tx_buf;
190 :
191 : _Atomic uint32_t nl_batch_bufsize = NL_DEFAULT_BATCH_BUFSIZE;
192 : _Atomic uint32_t nl_batch_send_threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD;
193 :
194 : struct nl_batch {
195 : void *buf;
196 : size_t bufsiz;
197 : size_t limit;
198 :
199 : void *buf_head;
200 : size_t curlen;
201 : size_t msgcnt;
202 :
203 : const struct zebra_dplane_info *zns;
204 :
205 : struct dplane_ctx_list_head ctx_list;
206 :
207 : /*
208 : * Pointer to the queue of completed contexts outbound back
209 : * towards the dataplane module.
210 : */
211 : struct dplane_ctx_list_head *ctx_out_q;
212 : };
213 :
214 0 : int netlink_config_write_helper(struct vty *vty)
215 : {
216 0 : uint32_t size =
217 0 : atomic_load_explicit(&nl_batch_bufsize, memory_order_relaxed);
218 0 : uint32_t threshold = atomic_load_explicit(&nl_batch_send_threshold,
219 : memory_order_relaxed);
220 :
221 0 : if (size != NL_DEFAULT_BATCH_BUFSIZE
222 0 : || threshold != NL_DEFAULT_BATCH_SEND_THRESHOLD)
223 0 : vty_out(vty, "zebra kernel netlink batch-tx-buf %u %u\n", size,
224 : threshold);
225 :
226 0 : if (if_netlink_frr_protodown_r_bit_is_set())
227 0 : vty_out(vty, "zebra protodown reason-bit %u\n",
228 0 : if_netlink_get_frr_protodown_r_bit());
229 :
230 0 : return 0;
231 : }
232 :
233 0 : void netlink_set_batch_buffer_size(uint32_t size, uint32_t threshold, bool set)
234 : {
235 0 : if (!set) {
236 0 : size = NL_DEFAULT_BATCH_BUFSIZE;
237 0 : threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD;
238 : }
239 :
240 0 : atomic_store_explicit(&nl_batch_bufsize, size, memory_order_relaxed);
241 0 : atomic_store_explicit(&nl_batch_send_threshold, threshold,
242 : memory_order_relaxed);
243 0 : }
244 :
245 0 : int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns_id, int startup)
246 : {
247 : /*
248 : * This is an error condition that must be handled during
249 : * development.
250 : *
251 : * The netlink_talk_filter function is used for communication
252 : * down the netlink_cmd pipe and we are expecting
253 : * an ack being received. So if we get here
254 : * then we did not receive the ack and instead
255 : * received some other message in an unexpected
256 : * way.
257 : */
258 0 : zlog_debug("%s: ignoring message type 0x%04x(%s) NS %u", __func__,
259 : h->nlmsg_type, nl_msg_type_to_str(h->nlmsg_type), ns_id);
260 0 : return 0;
261 : }
262 :
263 12 : static int netlink_recvbuf(struct nlsock *nl, uint32_t newsize)
264 : {
265 12 : uint32_t oldsize;
266 12 : socklen_t newlen = sizeof(newsize);
267 12 : socklen_t oldlen = sizeof(oldsize);
268 12 : int ret;
269 :
270 12 : ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldlen);
271 12 : if (ret < 0) {
272 0 : flog_err_sys(EC_LIB_SOCKET,
273 : "Can't get %s receive buffer size: %s", nl->name,
274 : safe_strerror(errno));
275 0 : return -1;
276 : }
277 :
278 : /* Try force option (linux >= 2.6.14) and fall back to normal set */
279 12 : frr_with_privs(&zserv_privs) {
280 12 : ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUFFORCE,
281 : &rcvbufsize, sizeof(rcvbufsize));
282 : }
283 12 : if (ret < 0)
284 12 : ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsize,
285 : sizeof(rcvbufsize));
286 12 : if (ret < 0) {
287 0 : flog_err_sys(EC_LIB_SOCKET,
288 : "Can't set %s receive buffer size: %s", nl->name,
289 : safe_strerror(errno));
290 0 : return -1;
291 : }
292 :
293 12 : ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &newsize, &newlen);
294 12 : if (ret < 0) {
295 0 : flog_err_sys(EC_LIB_SOCKET,
296 : "Can't get %s receive buffer size: %s", nl->name,
297 : safe_strerror(errno));
298 0 : return -1;
299 : }
300 : return 0;
301 : }
302 :
303 0 : static const char *group2str(uint32_t group)
304 : {
305 0 : switch (group) {
306 : case RTNLGRP_TUNNEL:
307 : return "RTNLGRP_TUNNEL";
308 0 : default:
309 0 : return "UNKNOWN";
310 : }
311 : }
312 :
313 : /* Make socket for Linux netlink interface. */
314 12 : static int netlink_socket(struct nlsock *nl, unsigned long groups,
315 : uint32_t ext_groups[], uint8_t ext_group_size,
316 : ns_id_t ns_id)
317 : {
318 12 : int ret;
319 12 : struct sockaddr_nl snl;
320 12 : int sock;
321 12 : int namelen;
322 :
323 24 : frr_with_privs(&zserv_privs) {
324 12 : sock = ns_socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE, ns_id);
325 12 : if (sock < 0) {
326 0 : zlog_err("Can't open %s socket: %s", nl->name,
327 : safe_strerror(errno));
328 0 : return -1;
329 : }
330 :
331 12 : memset(&snl, 0, sizeof(snl));
332 12 : snl.nl_family = AF_NETLINK;
333 12 : snl.nl_groups = groups;
334 :
335 12 : if (ext_group_size) {
336 : uint8_t i;
337 :
338 6 : for (i = 0; i < ext_group_size; i++) {
339 : #if defined SOL_NETLINK
340 6 : ret = setsockopt(sock, SOL_NETLINK,
341 : NETLINK_ADD_MEMBERSHIP,
342 3 : &ext_groups[i],
343 : sizeof(ext_groups[i]));
344 3 : if (ret < 0) {
345 3 : zlog_notice(
346 : "can't setsockopt NETLINK_ADD_MEMBERSHIP for group %s(%u), this linux kernel does not support it: %s(%d)",
347 : group2str(ext_groups[i]),
348 : ext_groups[i],
349 : safe_strerror(errno), errno);
350 : }
351 : #else
352 : zlog_notice(
353 : "Unable to use NETLINK_ADD_MEMBERSHIP via SOL_NETLINK for %s(%u) since the linux kernel does not support the socket option",
354 : group2str(ext_groups[i]),
355 : ext_groups[i]);
356 : #endif
357 : }
358 : }
359 :
360 : /* Bind the socket to the netlink structure for anything. */
361 12 : ret = bind(sock, (struct sockaddr *)&snl, sizeof(snl));
362 : }
363 :
364 12 : if (ret < 0) {
365 0 : zlog_err("Can't bind %s socket to group 0x%x: %s", nl->name,
366 : snl.nl_groups, safe_strerror(errno));
367 0 : close(sock);
368 0 : return -1;
369 : }
370 :
371 : /* multiple netlink sockets will have different nl_pid */
372 12 : namelen = sizeof(snl);
373 12 : ret = getsockname(sock, (struct sockaddr *)&snl, (socklen_t *)&namelen);
374 12 : if (ret < 0 || namelen != sizeof(snl)) {
375 0 : flog_err_sys(EC_LIB_SOCKET, "Can't get %s socket name: %s",
376 : nl->name, safe_strerror(errno));
377 0 : close(sock);
378 0 : return -1;
379 : }
380 :
381 12 : nl->snl = snl;
382 12 : nl->sock = sock;
383 12 : nl->buflen = NL_RCV_PKT_BUF_SIZE;
384 12 : nl->buf = XMALLOC(MTYPE_NL_BUF, nl->buflen);
385 :
386 12 : return ret;
387 : }
388 :
389 : /*
390 : * Dispatch an incoming netlink message; used by the zebra main pthread's
391 : * netlink event reader.
392 : */
393 34 : static int netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id,
394 : int startup)
395 : {
396 : /*
397 : * When we handle new message types here
398 : * because we are starting to install them
399 : * then lets check the netlink_install_filter
400 : * and see if we should add the corresponding
401 : * allow through entry there.
402 : * Probably not needed to do but please
403 : * think about it.
404 : */
405 34 : switch (h->nlmsg_type) {
406 9 : case RTM_NEWROUTE:
407 9 : return netlink_route_change(h, ns_id, startup);
408 0 : case RTM_DELROUTE:
409 0 : return netlink_route_change(h, ns_id, startup);
410 14 : case RTM_NEWLINK:
411 14 : return netlink_link_change(h, ns_id, startup);
412 3 : case RTM_DELLINK:
413 3 : return netlink_link_change(h, ns_id, startup);
414 5 : case RTM_NEWNEIGH:
415 : case RTM_DELNEIGH:
416 : case RTM_GETNEIGH:
417 5 : return netlink_neigh_change(h, ns_id);
418 0 : case RTM_NEWRULE:
419 0 : return netlink_rule_change(h, ns_id, startup);
420 0 : case RTM_DELRULE:
421 0 : return netlink_rule_change(h, ns_id, startup);
422 0 : case RTM_NEWNEXTHOP:
423 0 : return netlink_nexthop_change(h, ns_id, startup);
424 0 : case RTM_DELNEXTHOP:
425 0 : return netlink_nexthop_change(h, ns_id, startup);
426 0 : case RTM_NEWQDISC:
427 : case RTM_DELQDISC:
428 0 : return netlink_qdisc_change(h, ns_id, startup);
429 0 : case RTM_NEWTCLASS:
430 : case RTM_DELTCLASS:
431 0 : return netlink_tclass_change(h, ns_id, startup);
432 0 : case RTM_NEWTFILTER:
433 : case RTM_DELTFILTER:
434 0 : return netlink_tfilter_change(h, ns_id, startup);
435 :
436 : /* Messages handled in the dplane thread */
437 : case RTM_NEWADDR:
438 : case RTM_DELADDR:
439 : case RTM_NEWNETCONF:
440 : case RTM_DELNETCONF:
441 : case RTM_NEWTUNNEL:
442 : case RTM_DELTUNNEL:
443 : case RTM_GETTUNNEL:
444 : return 0;
445 0 : default:
446 : /*
447 : * If we have received this message then
448 : * we have made a mistake during development
449 : * and we need to write some code to handle
450 : * this message type or not ask for
451 : * it to be sent up to us
452 : */
453 0 : flog_err(EC_ZEBRA_UNKNOWN_NLMSG,
454 : "Unknown netlink nlmsg_type %s(%d) vrf %u",
455 : nl_msg_type_to_str(h->nlmsg_type), h->nlmsg_type,
456 : ns_id);
457 0 : break;
458 : }
459 0 : return 0;
460 : }
461 :
462 : /*
463 : * Dispatch an incoming netlink message; used by the dataplane pthread's
464 : * netlink event reader code.
465 : */
466 84 : static int dplane_netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id,
467 : int startup)
468 : {
469 : /*
470 : * Dispatch the incoming messages that the dplane pthread handles
471 : */
472 84 : switch (h->nlmsg_type) {
473 3 : case RTM_NEWADDR:
474 : case RTM_DELADDR:
475 3 : return netlink_interface_addr_dplane(h, ns_id, startup);
476 :
477 64 : case RTM_NEWNETCONF:
478 : case RTM_DELNETCONF:
479 64 : return netlink_netconf_change(h, ns_id, startup);
480 :
481 : /* TODO -- other messages for the dplane socket and pthread */
482 :
483 : case RTM_NEWLINK:
484 : case RTM_DELLINK:
485 :
486 : default:
487 : break;
488 : }
489 :
490 : return 0;
491 : }
492 :
493 14 : static void kernel_read(struct thread *thread)
494 : {
495 14 : struct zebra_ns *zns = (struct zebra_ns *)THREAD_ARG(thread);
496 14 : struct zebra_dplane_info dp_info;
497 :
498 : /* Capture key info from ns struct */
499 14 : zebra_dplane_info_from_zns(&dp_info, zns, false);
500 :
501 14 : netlink_parse_info(netlink_information_fetch, &zns->netlink, &dp_info,
502 : 5, false);
503 :
504 14 : thread_add_read(zrouter.master, kernel_read, zns, zns->netlink.sock,
505 : &zns->t_netlink);
506 14 : }
507 :
508 : /*
509 : * Called by the dplane pthread to read incoming OS messages and dispatch them.
510 : */
511 51 : int kernel_dplane_read(struct zebra_dplane_info *info)
512 : {
513 51 : struct nlsock *nl = kernel_netlink_nlsock_lookup(info->sock);
514 :
515 51 : netlink_parse_info(dplane_netlink_information_fetch, nl, info, 5,
516 : false);
517 :
518 51 : return 0;
519 : }
520 :
521 : /*
522 : * Filter out messages from self that occur on listener socket,
523 : * caused by our actions on the command socket(s)
524 : *
525 : * When we add new Netlink message types we probably
526 : * do not need to add them here as that we are filtering
527 : * on the routes we actually care to receive( which is rarer
528 : * then the normal course of operations). We are intentionally
529 : * allowing some messages from ourselves through
530 : * ( I'm looking at you Interface based netlink messages )
531 : * so that we only have to write one way to handle incoming
532 : * address add/delete and xxxNETCONF changes.
533 : */
534 6 : static void netlink_install_filter(int sock, uint32_t pid, uint32_t dplane_pid)
535 : {
536 : /*
537 : * BPF_JUMP instructions and where you jump to are based upon
538 : * 0 as being the next statement. So count from 0. Writing
539 : * this down because every time I look at this I have to
540 : * re-remember it.
541 : */
542 6 : struct sock_filter filter[] = {
543 : /*
544 : * Logic:
545 : * if (nlmsg_pid == pid ||
546 : * nlmsg_pid == dplane_pid) {
547 : * if (the incoming nlmsg_type ==
548 : * RTM_NEWADDR || RTM_DELADDR || RTM_NEWNETCONF ||
549 : * RTM_DELNETCONF)
550 : * keep this message
551 : * else
552 : * skip this message
553 : * } else
554 : * keep this netlink message
555 : */
556 : /*
557 : * 0: Load the nlmsg_pid into the BPF register
558 : */
559 : BPF_STMT(BPF_LD | BPF_ABS | BPF_W,
560 : offsetof(struct nlmsghdr, nlmsg_pid)),
561 : /*
562 : * 1: Compare to pid
563 : */
564 6 : BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 1, 0),
565 : /*
566 : * 2: Compare to dplane pid
567 : */
568 6 : BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(dplane_pid), 0, 6),
569 : /*
570 : * 3: Load the nlmsg_type into BPF register
571 : */
572 : BPF_STMT(BPF_LD | BPF_ABS | BPF_H,
573 : offsetof(struct nlmsghdr, nlmsg_type)),
574 : /*
575 : * 4: Compare to RTM_NEWADDR
576 : */
577 6 : BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWADDR), 4, 0),
578 : /*
579 : * 5: Compare to RTM_DELADDR
580 : */
581 6 : BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELADDR), 3, 0),
582 : /*
583 : * 6: Compare to RTM_NEWNETCONF
584 : */
585 6 : BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWNETCONF), 2,
586 : 0),
587 : /*
588 : * 7: Compare to RTM_DELNETCONF
589 : */
590 6 : BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELNETCONF), 1,
591 : 0),
592 : /*
593 : * 8: This is the end state of we want to skip the
594 : * message
595 : */
596 : BPF_STMT(BPF_RET | BPF_K, 0),
597 : /* 9: This is the end state of we want to keep
598 : * the message
599 : */
600 : BPF_STMT(BPF_RET | BPF_K, 0xffff),
601 : };
602 :
603 6 : struct sock_fprog prog = {
604 : .len = array_size(filter), .filter = filter,
605 : };
606 :
607 6 : if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))
608 : < 0)
609 0 : flog_err_sys(EC_LIB_SOCKET, "Can't install socket filter: %s",
610 : safe_strerror(errno));
611 6 : }
612 :
613 23 : void netlink_parse_rtattr_flags(struct rtattr **tb, int max, struct rtattr *rta,
614 : int len, unsigned short flags)
615 : {
616 23 : unsigned short type;
617 :
618 23 : memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
619 702 : while (RTA_OK(rta, len)) {
620 679 : type = rta->rta_type & ~flags;
621 679 : if ((type <= max) && (!tb[type]))
622 610 : tb[type] = rta;
623 679 : rta = RTA_NEXT(rta, len);
624 : }
625 23 : }
626 :
627 126 : void netlink_parse_rtattr(struct rtattr **tb, int max, struct rtattr *rta,
628 : int len)
629 : {
630 126 : memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
631 607 : while (RTA_OK(rta, len)) {
632 481 : if (rta->rta_type <= max)
633 475 : tb[rta->rta_type] = rta;
634 481 : rta = RTA_NEXT(rta, len);
635 : }
636 126 : }
637 :
638 : /**
639 : * netlink_parse_rtattr_nested() - Parses a nested route attribute
640 : * @tb: Pointer to array for storing rtattr in.
641 : * @max: Max number to store.
642 : * @rta: Pointer to rtattr to look for nested items in.
643 : */
644 8 : void netlink_parse_rtattr_nested(struct rtattr **tb, int max,
645 : struct rtattr *rta)
646 : {
647 8 : netlink_parse_rtattr(tb, max, RTA_DATA(rta), RTA_PAYLOAD(rta));
648 8 : }
649 :
650 0 : bool nl_addraw_l(struct nlmsghdr *n, unsigned int maxlen, const void *data,
651 : unsigned int len)
652 : {
653 0 : if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) {
654 0 : zlog_err("ERROR message exceeded bound of %d", maxlen);
655 0 : return false;
656 : }
657 :
658 0 : memcpy(NLMSG_TAIL(n), data, len);
659 0 : memset((uint8_t *)NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len);
660 0 : n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len);
661 :
662 0 : return true;
663 : }
664 :
665 97 : bool nl_attr_put(struct nlmsghdr *n, unsigned int maxlen, int type,
666 : const void *data, unsigned int alen)
667 : {
668 97 : int len;
669 97 : struct rtattr *rta;
670 :
671 97 : len = RTA_LENGTH(alen);
672 :
673 97 : if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen)
674 : return false;
675 :
676 97 : rta = (struct rtattr *)(((char *)n) + NLMSG_ALIGN(n->nlmsg_len));
677 97 : rta->rta_type = type;
678 97 : rta->rta_len = len;
679 :
680 97 : if (data)
681 97 : memcpy(RTA_DATA(rta), data, alen);
682 : else
683 0 : assert(alen == 0);
684 :
685 97 : n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
686 :
687 97 : return true;
688 : }
689 :
690 0 : bool nl_attr_put8(struct nlmsghdr *n, unsigned int maxlen, int type,
691 : uint8_t data)
692 : {
693 0 : return nl_attr_put(n, maxlen, type, &data, sizeof(uint8_t));
694 : }
695 :
696 0 : bool nl_attr_put16(struct nlmsghdr *n, unsigned int maxlen, int type,
697 : uint16_t data)
698 : {
699 0 : return nl_attr_put(n, maxlen, type, &data, sizeof(uint16_t));
700 : }
701 :
702 79 : bool nl_attr_put32(struct nlmsghdr *n, unsigned int maxlen, int type,
703 : uint32_t data)
704 : {
705 79 : return nl_attr_put(n, maxlen, type, &data, sizeof(uint32_t));
706 : }
707 :
708 0 : struct rtattr *nl_attr_nest(struct nlmsghdr *n, unsigned int maxlen, int type)
709 : {
710 0 : struct rtattr *nest = NLMSG_TAIL(n);
711 :
712 0 : if (!nl_attr_put(n, maxlen, type, NULL, 0))
713 : return NULL;
714 :
715 0 : nest->rta_type |= NLA_F_NESTED;
716 0 : return nest;
717 : }
718 :
719 0 : int nl_attr_nest_end(struct nlmsghdr *n, struct rtattr *nest)
720 : {
721 0 : nest->rta_len = (uint8_t *)NLMSG_TAIL(n) - (uint8_t *)nest;
722 0 : return n->nlmsg_len;
723 : }
724 :
725 0 : struct rtnexthop *nl_attr_rtnh(struct nlmsghdr *n, unsigned int maxlen)
726 : {
727 0 : struct rtnexthop *rtnh = (struct rtnexthop *)NLMSG_TAIL(n);
728 :
729 0 : if (NLMSG_ALIGN(n->nlmsg_len) + RTNH_ALIGN(sizeof(struct rtnexthop))
730 0 : > maxlen)
731 : return NULL;
732 :
733 0 : memset(rtnh, 0, sizeof(struct rtnexthop));
734 0 : n->nlmsg_len =
735 0 : NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(sizeof(struct rtnexthop));
736 :
737 0 : return rtnh;
738 : }
739 :
740 0 : void nl_attr_rtnh_end(struct nlmsghdr *n, struct rtnexthop *rtnh)
741 : {
742 0 : rtnh->rtnh_len = (uint8_t *)NLMSG_TAIL(n) - (uint8_t *)rtnh;
743 0 : }
744 :
745 0 : bool nl_rta_put(struct rtattr *rta, unsigned int maxlen, int type,
746 : const void *data, int alen)
747 : {
748 0 : struct rtattr *subrta;
749 0 : int len = RTA_LENGTH(alen);
750 :
751 0 : if (RTA_ALIGN(rta->rta_len) + RTA_ALIGN(len) > maxlen) {
752 0 : zlog_err("ERROR max allowed bound %d exceeded for rtattr",
753 : maxlen);
754 0 : return false;
755 : }
756 0 : subrta = (struct rtattr *)(((char *)rta) + RTA_ALIGN(rta->rta_len));
757 0 : subrta->rta_type = type;
758 0 : subrta->rta_len = len;
759 0 : if (alen)
760 0 : memcpy(RTA_DATA(subrta), data, alen);
761 0 : rta->rta_len = NLMSG_ALIGN(rta->rta_len) + RTA_ALIGN(len);
762 :
763 0 : return true;
764 : }
765 :
766 0 : bool nl_rta_put16(struct rtattr *rta, unsigned int maxlen, int type,
767 : uint16_t data)
768 : {
769 0 : return nl_rta_put(rta, maxlen, type, &data, sizeof(uint16_t));
770 : }
771 :
772 0 : bool nl_rta_put64(struct rtattr *rta, unsigned int maxlen, int type,
773 : uint64_t data)
774 : {
775 0 : return nl_rta_put(rta, maxlen, type, &data, sizeof(uint64_t));
776 : }
777 :
778 0 : struct rtattr *nl_rta_nest(struct rtattr *rta, unsigned int maxlen, int type)
779 : {
780 0 : struct rtattr *nest = RTA_TAIL(rta);
781 :
782 0 : if (nl_rta_put(rta, maxlen, type, NULL, 0))
783 : return NULL;
784 :
785 0 : nest->rta_type |= NLA_F_NESTED;
786 :
787 0 : return nest;
788 : }
789 :
790 0 : int nl_rta_nest_end(struct rtattr *rta, struct rtattr *nest)
791 : {
792 0 : nest->rta_len = (uint8_t *)RTA_TAIL(rta) - (uint8_t *)nest;
793 :
794 0 : return rta->rta_len;
795 : }
796 :
797 12 : const char *nl_msg_type_to_str(uint16_t msg_type)
798 : {
799 0 : return lookup_msg(nlmsg_str, msg_type, "");
800 : }
801 :
802 0 : const char *nl_rtproto_to_str(uint8_t rtproto)
803 : {
804 0 : return lookup_msg(rtproto_str, rtproto, "");
805 : }
806 :
807 0 : const char *nl_family_to_str(uint8_t family)
808 : {
809 0 : return lookup_msg(family_str, family, "");
810 : }
811 :
812 0 : const char *nl_rttype_to_str(uint8_t rttype)
813 : {
814 0 : return lookup_msg(rttype_str, rttype, "");
815 : }
816 :
817 : #define NLA_OK(nla, len) \
818 : ((len) >= (int)sizeof(struct nlattr) \
819 : && (nla)->nla_len >= sizeof(struct nlattr) \
820 : && (nla)->nla_len <= (len))
821 : #define NLA_NEXT(nla, attrlen) \
822 : ((attrlen) -= NLA_ALIGN((nla)->nla_len), \
823 : (struct nlattr *)(((char *)(nla)) + NLA_ALIGN((nla)->nla_len)))
824 : #define NLA_LENGTH(len) (NLA_ALIGN(sizeof(struct nlattr)) + (len))
825 : #define NLA_DATA(nla) ((struct nlattr *)(((char *)(nla)) + NLA_LENGTH(0)))
826 :
827 : #define ERR_NLA(err, inner_len) \
828 : ((struct nlattr *)(((char *)(err)) \
829 : + NLMSG_ALIGN(sizeof(struct nlmsgerr)) \
830 : + NLMSG_ALIGN((inner_len))))
831 :
832 12 : static void netlink_parse_nlattr(struct nlattr **tb, int max,
833 : struct nlattr *nla, int len)
834 : {
835 24 : while (NLA_OK(nla, len)) {
836 12 : if (nla->nla_type <= max)
837 12 : tb[nla->nla_type] = nla;
838 12 : nla = NLA_NEXT(nla, len);
839 : }
840 12 : }
841 :
842 12 : static void netlink_parse_extended_ack(struct nlmsghdr *h)
843 : {
844 12 : struct nlattr *tb[NLMSGERR_ATTR_MAX + 1] = {};
845 12 : const struct nlmsgerr *err = (const struct nlmsgerr *)NLMSG_DATA(h);
846 12 : const struct nlmsghdr *err_nlh = NULL;
847 : /* Length not including nlmsghdr */
848 12 : uint32_t len = 0;
849 : /* Inner error netlink message length */
850 12 : uint32_t inner_len = 0;
851 12 : const char *msg = NULL;
852 12 : uint32_t off = 0;
853 :
854 12 : if (!(h->nlmsg_flags & NLM_F_CAPPED))
855 0 : inner_len = (uint32_t)NLMSG_PAYLOAD(&err->msg, 0);
856 :
857 12 : len = (uint32_t)(NLMSG_PAYLOAD(h, sizeof(struct nlmsgerr)) - inner_len);
858 :
859 12 : netlink_parse_nlattr(tb, NLMSGERR_ATTR_MAX, ERR_NLA(err, inner_len),
860 : len);
861 :
862 12 : if (tb[NLMSGERR_ATTR_MSG])
863 12 : msg = (const char *)NLA_DATA(tb[NLMSGERR_ATTR_MSG]);
864 :
865 12 : if (tb[NLMSGERR_ATTR_OFFS]) {
866 0 : off = *(uint32_t *)NLA_DATA(tb[NLMSGERR_ATTR_OFFS]);
867 :
868 0 : if (off > h->nlmsg_len) {
869 0 : zlog_err("Invalid offset for NLMSGERR_ATTR_OFFS");
870 0 : } else if (!(h->nlmsg_flags & NLM_F_CAPPED)) {
871 : /*
872 : * Header of failed message
873 : * we are not doing anything currently with it
874 : * but noticing it for later.
875 : */
876 0 : err_nlh = &err->msg;
877 0 : zlog_debug("%s: Received %s extended Ack", __func__,
878 : nl_msg_type_to_str(err_nlh->nlmsg_type));
879 : }
880 : }
881 :
882 12 : if (msg && *msg != '\0') {
883 12 : bool is_err = !!err->error;
884 :
885 12 : if (is_err)
886 12 : zlog_err("Extended Error: %s", msg);
887 : else
888 0 : flog_warn(EC_ZEBRA_NETLINK_EXTENDED_WARNING,
889 : "Extended Warning: %s", msg);
890 : }
891 12 : }
892 :
893 : /*
894 : * netlink_send_msg - send a netlink message of a certain size.
895 : *
896 : * Returns -1 on error. Otherwise, it returns the number of bytes sent.
897 : */
898 45 : static ssize_t netlink_send_msg(const struct nlsock *nl, void *buf,
899 : size_t buflen)
900 : {
901 45 : struct sockaddr_nl snl = {};
902 45 : struct iovec iov = {};
903 45 : struct msghdr msg = {};
904 45 : ssize_t status;
905 45 : int save_errno = 0;
906 :
907 45 : iov.iov_base = buf;
908 45 : iov.iov_len = buflen;
909 45 : msg.msg_name = &snl;
910 45 : msg.msg_namelen = sizeof(snl);
911 45 : msg.msg_iov = &iov;
912 45 : msg.msg_iovlen = 1;
913 :
914 45 : snl.nl_family = AF_NETLINK;
915 :
916 : /* Send message to netlink interface. */
917 90 : frr_with_privs(&zserv_privs) {
918 45 : status = sendmsg(nl->sock, &msg, 0);
919 45 : save_errno = errno;
920 : }
921 :
922 45 : if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) {
923 0 : zlog_debug("%s: >> netlink message dump [sent]", __func__);
924 : #ifdef NETLINK_DEBUG
925 0 : nl_dump(buf, buflen);
926 : #else
927 : zlog_hexdump(buf, buflen);
928 : #endif /* NETLINK_DEBUG */
929 : }
930 :
931 45 : if (status == -1) {
932 0 : flog_err_sys(EC_LIB_SOCKET, "%s error: %s", __func__,
933 : safe_strerror(save_errno));
934 0 : return -1;
935 : }
936 :
937 : return status;
938 : }
939 :
940 : /*
941 : * netlink_recv_msg - receive a netlink message.
942 : *
943 : * Returns -1 on error, 0 if read would block or the number of bytes received.
944 : */
945 244 : static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg)
946 : {
947 244 : struct iovec iov;
948 244 : int status;
949 :
950 244 : iov.iov_base = nl->buf;
951 244 : iov.iov_len = nl->buflen;
952 244 : msg->msg_iov = &iov;
953 244 : msg->msg_iovlen = 1;
954 :
955 244 : do {
956 244 : int bytes;
957 :
958 244 : bytes = recv(nl->sock, NULL, 0, MSG_PEEK | MSG_TRUNC);
959 :
960 244 : if (bytes >= 0 && (size_t)bytes > nl->buflen) {
961 0 : nl->buf = XREALLOC(MTYPE_NL_BUF, nl->buf, bytes);
962 0 : nl->buflen = bytes;
963 0 : iov.iov_base = nl->buf;
964 0 : iov.iov_len = nl->buflen;
965 : }
966 :
967 244 : status = recvmsg(nl->sock, msg, 0);
968 244 : } while (status == -1 && errno == EINTR);
969 :
970 244 : if (status == -1) {
971 71 : if (errno == EWOULDBLOCK || errno == EAGAIN)
972 : return 0;
973 0 : flog_err(EC_ZEBRA_RECVMSG_OVERRUN, "%s recvmsg overrun: %s",
974 : nl->name, safe_strerror(errno));
975 : /*
976 : * In this case we are screwed. There is no good way to recover
977 : * zebra at this point.
978 : */
979 0 : exit(-1);
980 : }
981 :
982 173 : if (status == 0) {
983 0 : flog_err_sys(EC_LIB_SOCKET, "%s EOF", nl->name);
984 0 : return -1;
985 : }
986 :
987 173 : if (msg->msg_namelen != sizeof(struct sockaddr_nl)) {
988 0 : flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
989 : "%s sender address length error: length %d", nl->name,
990 : msg->msg_namelen);
991 0 : return -1;
992 : }
993 :
994 173 : if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) {
995 0 : zlog_debug("%s: << netlink message dump [recv]", __func__);
996 : #ifdef NETLINK_DEBUG
997 0 : nl_dump(nl->buf, status);
998 : #else
999 : zlog_hexdump(nl->buf, status);
1000 : #endif /* NETLINK_DEBUG */
1001 : }
1002 :
1003 : return status;
1004 : }
1005 :
1006 : /*
1007 : * netlink_parse_error - parse a netlink error message
1008 : *
1009 : * Returns 1 if this message is acknowledgement, 0 if this error should be
1010 : * ignored, -1 otherwise.
1011 : */
1012 16 : static int netlink_parse_error(const struct nlsock *nl, struct nlmsghdr *h,
1013 : bool is_cmd, bool startup)
1014 : {
1015 16 : struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
1016 16 : int errnum = err->error;
1017 16 : int msg_type = err->msg.nlmsg_type;
1018 :
1019 16 : if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
1020 0 : flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
1021 : "%s error: message truncated", nl->name);
1022 0 : return -1;
1023 : }
1024 :
1025 : /*
1026 : * Parse the extended information before we actually handle it. At this
1027 : * point in time we do not do anything other than report the issue.
1028 : */
1029 16 : if (h->nlmsg_flags & NLM_F_ACK_TLVS)
1030 12 : netlink_parse_extended_ack(h);
1031 :
1032 : /* If the error field is zero, then this is an ACK. */
1033 16 : if (err->error == 0) {
1034 0 : if (IS_ZEBRA_DEBUG_KERNEL) {
1035 0 : zlog_debug("%s: %s ACK: type=%s(%u), seq=%u, pid=%u",
1036 : __func__, nl->name,
1037 : nl_msg_type_to_str(err->msg.nlmsg_type),
1038 : err->msg.nlmsg_type, err->msg.nlmsg_seq,
1039 : err->msg.nlmsg_pid);
1040 : }
1041 :
1042 0 : return 1;
1043 : }
1044 :
1045 : /*
1046 : * Deal with errors that occur because of races in link handling
1047 : * or types are not supported in kernel.
1048 : */
1049 16 : if (is_cmd &&
1050 4 : ((msg_type == RTM_DELROUTE &&
1051 16 : (-errnum == ENODEV || -errnum == ESRCH)) ||
1052 4 : (msg_type == RTM_NEWROUTE &&
1053 4 : (-errnum == ENETDOWN || -errnum == EEXIST)) ||
1054 12 : ((msg_type == RTM_NEWTUNNEL || msg_type == RTM_DELTUNNEL ||
1055 0 : msg_type == RTM_GETTUNNEL) &&
1056 : (-errnum == EOPNOTSUPP)))) {
1057 4 : if (IS_ZEBRA_DEBUG_KERNEL)
1058 0 : zlog_debug("%s: error: %s type=%s(%u), seq=%u, pid=%u",
1059 : nl->name, safe_strerror(-errnum),
1060 : nl_msg_type_to_str(msg_type), msg_type,
1061 : err->msg.nlmsg_seq, err->msg.nlmsg_pid);
1062 4 : return 0;
1063 : }
1064 :
1065 : /*
1066 : * We see RTM_DELNEIGH when shutting down an interface with an IPv4
1067 : * link-local. The kernel should have already deleted the neighbor so
1068 : * do not log these as an error.
1069 : */
1070 12 : if (msg_type == RTM_DELNEIGH
1071 12 : || (is_cmd && msg_type == RTM_NEWROUTE
1072 4 : && (-errnum == ESRCH || -errnum == ENETUNREACH))) {
1073 : /*
1074 : * This is known to happen in some situations, don't log as
1075 : * error.
1076 : */
1077 0 : if (IS_ZEBRA_DEBUG_KERNEL)
1078 0 : zlog_debug("%s error: %s, type=%s(%u), seq=%u, pid=%u",
1079 : nl->name, safe_strerror(-errnum),
1080 : nl_msg_type_to_str(msg_type), msg_type,
1081 : err->msg.nlmsg_seq, err->msg.nlmsg_pid);
1082 : } else {
1083 12 : if ((msg_type != RTM_GETNEXTHOP) || !startup)
1084 12 : flog_err(EC_ZEBRA_UNEXPECTED_MESSAGE,
1085 : "%s error: %s, type=%s(%u), seq=%u, pid=%u",
1086 : nl->name, safe_strerror(-errnum),
1087 : nl_msg_type_to_str(msg_type), msg_type,
1088 : err->msg.nlmsg_seq, err->msg.nlmsg_pid);
1089 : }
1090 :
1091 : return -1;
1092 : }
1093 :
1094 : /*
1095 : * netlink_parse_info
1096 : *
1097 : * Receive message from netlink interface and pass those information
1098 : * to the given function.
1099 : *
1100 : * filter -> Function to call to read the results
1101 : * nl -> netlink socket information
1102 : * zns -> The zebra namespace data
1103 : * count -> How many we should read in, 0 means as much as possible
1104 : * startup -> Are we reading in under startup conditions? passed to
1105 : * the filter.
1106 : */
1107 95 : int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
1108 : struct nlsock *nl, const struct zebra_dplane_info *zns,
1109 : int count, bool startup)
1110 : {
1111 95 : int status;
1112 95 : int ret = 0;
1113 95 : int error;
1114 95 : int read_in = 0;
1115 :
1116 219 : while (1) {
1117 219 : struct sockaddr_nl snl;
1118 219 : struct msghdr msg = {.msg_name = (void *)&snl,
1119 : .msg_namelen = sizeof(snl)};
1120 219 : struct nlmsghdr *h;
1121 :
1122 219 : if (count && read_in >= count)
1123 36 : return 0;
1124 :
1125 216 : status = netlink_recv_msg(nl, &msg);
1126 216 : if (status == -1)
1127 : return -1;
1128 216 : else if (status == 0)
1129 : break;
1130 :
1131 157 : read_in++;
1132 157 : for (h = (struct nlmsghdr *)nl->buf;
1133 362 : (status >= 0 && NLMSG_OK(h, (unsigned int)status));
1134 205 : h = NLMSG_NEXT(h, status)) {
1135 : /* Finish of reading. */
1136 238 : if (h->nlmsg_type == NLMSG_DONE)
1137 33 : return ret;
1138 :
1139 : /* Error handling. */
1140 205 : if (h->nlmsg_type == NLMSG_ERROR) {
1141 0 : int err = netlink_parse_error(
1142 0 : nl, h, zns->is_cmd, startup);
1143 :
1144 0 : if (err == 1) {
1145 0 : if (!(h->nlmsg_flags & NLM_F_MULTI))
1146 : return 0;
1147 0 : continue;
1148 : } else
1149 0 : return err;
1150 : }
1151 :
1152 : /*
1153 : * What is the right thing to do? The kernel
1154 : * is telling us that the dump request was interrupted
1155 : * and we more than likely are out of luck and have
1156 : * missed data from the kernel. At this point in time
1157 : * lets just note that this is happening.
1158 : */
1159 205 : if (h->nlmsg_flags & NLM_F_DUMP_INTR)
1160 0 : flog_err(
1161 : EC_ZEBRA_NETLINK_BAD_SEQUENCE,
1162 : "netlink recvmsg: The Dump request was interrupted");
1163 :
1164 : /* OK we got netlink message. */
1165 205 : if (IS_ZEBRA_DEBUG_KERNEL)
1166 0 : zlog_debug(
1167 : "%s: %s type %s(%u), len=%d, seq=%u, pid=%u",
1168 : __func__, nl->name,
1169 : nl_msg_type_to_str(h->nlmsg_type),
1170 : h->nlmsg_type, h->nlmsg_len,
1171 : h->nlmsg_seq, h->nlmsg_pid);
1172 :
1173 :
1174 : /*
1175 : * Ignore messages that maybe sent from
1176 : * other actors besides the kernel
1177 : */
1178 205 : if (snl.nl_pid != 0) {
1179 0 : zlog_debug("Ignoring message from pid %u",
1180 : snl.nl_pid);
1181 0 : continue;
1182 : }
1183 :
1184 205 : error = (*filter)(h, zns->ns_id, startup);
1185 205 : if (error < 0) {
1186 0 : zlog_debug("%s filter function error",
1187 : nl->name);
1188 0 : ret = error;
1189 : }
1190 : }
1191 :
1192 : /* After error care. */
1193 124 : if (msg.msg_flags & MSG_TRUNC) {
1194 0 : flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
1195 : "%s error: message truncated", nl->name);
1196 0 : continue;
1197 : }
1198 124 : if (status) {
1199 0 : flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
1200 : "%s error: data remnant size %d", nl->name,
1201 : status);
1202 0 : return -1;
1203 : }
1204 : }
1205 59 : return ret;
1206 : }
1207 :
1208 : /*
1209 : * netlink_talk_info
1210 : *
1211 : * sendmsg() to netlink socket then recvmsg().
1212 : * Calls netlink_parse_info to parse returned data
1213 : *
1214 : * filter -> The filter to read final results from kernel
1215 : * nlmsghdr -> The data to send to the kernel
1216 : * dp_info -> The dataplane and netlink socket information
1217 : * startup -> Are we reading in under startup conditions
1218 : * This is passed through eventually to filter.
1219 : */
1220 0 : static int netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t,
1221 : int startup),
1222 : struct nlmsghdr *n,
1223 : struct zebra_dplane_info *dp_info, bool startup)
1224 : {
1225 0 : struct nlsock *nl;
1226 :
1227 0 : nl = kernel_netlink_nlsock_lookup(dp_info->sock);
1228 0 : n->nlmsg_seq = dp_info->seq;
1229 0 : n->nlmsg_pid = nl->snl.nl_pid;
1230 :
1231 0 : if (IS_ZEBRA_DEBUG_KERNEL)
1232 0 : zlog_debug(
1233 : "netlink_talk: %s type %s(%u), len=%d seq=%u flags 0x%x",
1234 : nl->name, nl_msg_type_to_str(n->nlmsg_type),
1235 : n->nlmsg_type, n->nlmsg_len, n->nlmsg_seq,
1236 : n->nlmsg_flags);
1237 :
1238 0 : if (netlink_send_msg(nl, n, n->nlmsg_len) == -1)
1239 : return -1;
1240 :
1241 : /*
1242 : * Get reply from netlink socket.
1243 : * The reply should either be an acknowlegement or an error.
1244 : */
1245 0 : return netlink_parse_info(filter, nl, dp_info, 0, startup);
1246 : }
1247 :
1248 : /*
1249 : * Synchronous version of netlink_talk_info. Converts args to suit the
1250 : * common version, which is suitable for both sync and async use.
1251 : */
1252 0 : int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
1253 : struct nlmsghdr *n, struct nlsock *nl, struct zebra_ns *zns,
1254 : bool startup)
1255 : {
1256 0 : struct zebra_dplane_info dp_info;
1257 :
1258 : /* Increment sequence number before capturing snapshot of ns socket
1259 : * info.
1260 : */
1261 0 : nl->seq++;
1262 :
1263 : /* Capture info in intermediate info struct */
1264 0 : zebra_dplane_info_from_zns(&dp_info, zns, (nl == &(zns->netlink_cmd)));
1265 :
1266 0 : return netlink_talk_info(filter, n, &dp_info, startup);
1267 : }
1268 :
1269 : /* Issue request message to kernel via netlink socket. GET messages
1270 : * are issued through this interface.
1271 : */
1272 33 : int netlink_request(struct nlsock *nl, void *req)
1273 : {
1274 33 : struct nlmsghdr *n = (struct nlmsghdr *)req;
1275 :
1276 : /* Check netlink socket. */
1277 33 : if (nl->sock < 0) {
1278 0 : flog_err_sys(EC_LIB_SOCKET, "%s socket isn't active.",
1279 : nl->name);
1280 0 : return -1;
1281 : }
1282 :
1283 : /* Fill common fields for all requests. */
1284 33 : n->nlmsg_pid = nl->snl.nl_pid;
1285 33 : n->nlmsg_seq = ++nl->seq;
1286 :
1287 33 : if (netlink_send_msg(nl, req, n->nlmsg_len) == -1)
1288 : return -1;
1289 :
1290 : return 0;
1291 : }
1292 :
1293 12 : static int nl_batch_read_resp(struct nl_batch *bth)
1294 : {
1295 12 : struct nlmsghdr *h;
1296 12 : struct sockaddr_nl snl;
1297 12 : struct msghdr msg = {};
1298 12 : int status, seq;
1299 12 : struct nlsock *nl;
1300 12 : struct zebra_dplane_ctx *ctx;
1301 12 : bool ignore_msg;
1302 :
1303 12 : nl = kernel_netlink_nlsock_lookup(bth->zns->sock);
1304 :
1305 12 : msg.msg_name = (void *)&snl;
1306 12 : msg.msg_namelen = sizeof(snl);
1307 :
1308 : /*
1309 : * The responses are not batched, so we need to read and process one
1310 : * message at a time.
1311 : */
1312 28 : while (true) {
1313 28 : status = netlink_recv_msg(nl, &msg);
1314 : /*
1315 : * status == -1 is a full on failure somewhere
1316 : * since we don't know where the problem happened
1317 : * we must mark all as failed
1318 : *
1319 : * Else we mark everything as worked
1320 : *
1321 : */
1322 28 : if (status == -1 || status == 0) {
1323 65 : while ((ctx = dplane_ctx_dequeue(&(bth->ctx_list))) !=
1324 : NULL) {
1325 53 : if (status == -1)
1326 0 : dplane_ctx_set_status(
1327 : ctx,
1328 : ZEBRA_DPLANE_REQUEST_FAILURE);
1329 53 : dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx);
1330 : }
1331 12 : return status;
1332 : }
1333 :
1334 16 : h = (struct nlmsghdr *)nl->buf;
1335 16 : ignore_msg = false;
1336 16 : seq = h->nlmsg_seq;
1337 : /*
1338 : * Find the corresponding context object. Received responses are
1339 : * in the same order as requests we sent, so we can simply
1340 : * iterate over the context list and match responses with
1341 : * requests at same time.
1342 : */
1343 29 : while (true) {
1344 29 : ctx = dplane_ctx_get_head(&(bth->ctx_list));
1345 29 : if (ctx == NULL) {
1346 : /*
1347 : * This is a situation where we have gotten
1348 : * into a bad spot. We need to know that
1349 : * this happens( does it? )
1350 : */
1351 0 : zlog_err(
1352 : "%s:WARNING Received netlink Response for an error and no Contexts to associate with it",
1353 : __func__);
1354 0 : break;
1355 : }
1356 :
1357 : /*
1358 : * 'update' context objects take two consecutive
1359 : * sequence numbers.
1360 : */
1361 29 : if (dplane_ctx_is_update(ctx) &&
1362 0 : dplane_ctx_get_ns(ctx)->seq + 1 == seq) {
1363 : /*
1364 : * This is the situation where we get a response
1365 : * to a message that should be ignored.
1366 : */
1367 : ignore_msg = true;
1368 : break;
1369 : }
1370 :
1371 29 : ctx = dplane_ctx_dequeue(&(bth->ctx_list));
1372 29 : dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx);
1373 :
1374 : /* We have found corresponding context object. */
1375 29 : if (dplane_ctx_get_ns(ctx)->seq == seq)
1376 : break;
1377 :
1378 13 : if (dplane_ctx_get_ns(ctx)->seq > seq)
1379 0 : zlog_warn(
1380 : "%s:WARNING Received %u is less than any context on the queue ctx->seq %u",
1381 : __func__, seq,
1382 : dplane_ctx_get_ns(ctx)->seq);
1383 : }
1384 :
1385 16 : if (ignore_msg) {
1386 : /*
1387 : * If we ignore the message due to an update
1388 : * above we should still fricking decode the
1389 : * message for our operator to understand
1390 : * what is going on
1391 : */
1392 0 : int err = netlink_parse_error(nl, h, bth->zns->is_cmd,
1393 : false);
1394 :
1395 0 : zlog_debug("%s: netlink error message seq=%d %d",
1396 : __func__, h->nlmsg_seq, err);
1397 0 : continue;
1398 : }
1399 :
1400 : /*
1401 : * We received a message with the sequence number that isn't
1402 : * associated with any dplane context object.
1403 : */
1404 16 : if (ctx == NULL) {
1405 0 : if (IS_ZEBRA_DEBUG_KERNEL)
1406 0 : zlog_debug(
1407 : "%s: skipping unassociated response, seq number %d NS %u",
1408 : __func__, h->nlmsg_seq,
1409 : bth->zns->ns_id);
1410 0 : continue;
1411 : }
1412 :
1413 16 : if (h->nlmsg_type == NLMSG_ERROR) {
1414 16 : int err = netlink_parse_error(nl, h, bth->zns->is_cmd,
1415 : false);
1416 :
1417 16 : if (err == -1)
1418 12 : dplane_ctx_set_status(
1419 : ctx, ZEBRA_DPLANE_REQUEST_FAILURE);
1420 :
1421 16 : if (IS_ZEBRA_DEBUG_KERNEL)
1422 0 : zlog_debug("%s: netlink error message seq=%d ",
1423 : __func__, h->nlmsg_seq);
1424 16 : continue;
1425 : }
1426 :
1427 : /*
1428 : * If we get here then we did not receive neither the ack nor
1429 : * the error and instead received some other message in an
1430 : * unexpected way.
1431 : */
1432 0 : if (IS_ZEBRA_DEBUG_KERNEL)
1433 0 : zlog_debug("%s: ignoring message type 0x%04x(%s) NS %u",
1434 : __func__, h->nlmsg_type,
1435 : nl_msg_type_to_str(h->nlmsg_type),
1436 : bth->zns->ns_id);
1437 : }
1438 :
1439 : return 0;
1440 : }
1441 :
1442 30 : static void nl_batch_reset(struct nl_batch *bth)
1443 : {
1444 30 : bth->buf_head = bth->buf;
1445 30 : bth->curlen = 0;
1446 30 : bth->msgcnt = 0;
1447 30 : bth->zns = NULL;
1448 :
1449 30 : dplane_ctx_q_init(&(bth->ctx_list));
1450 : }
1451 :
1452 15 : static void nl_batch_init(struct nl_batch *bth,
1453 : struct dplane_ctx_list_head *ctx_out_q)
1454 : {
1455 : /*
1456 : * If the size of the buffer has changed, free and then allocate a new
1457 : * one.
1458 : */
1459 15 : size_t bufsize =
1460 15 : atomic_load_explicit(&nl_batch_bufsize, memory_order_relaxed);
1461 15 : if (bufsize != nl_batch_tx_bufsize) {
1462 3 : if (nl_batch_tx_buf)
1463 0 : XFREE(MTYPE_NL_BUF, nl_batch_tx_buf);
1464 :
1465 3 : nl_batch_tx_buf = XCALLOC(MTYPE_NL_BUF, bufsize);
1466 3 : nl_batch_tx_bufsize = bufsize;
1467 : }
1468 :
1469 15 : bth->buf = nl_batch_tx_buf;
1470 15 : bth->bufsiz = bufsize;
1471 15 : bth->limit = atomic_load_explicit(&nl_batch_send_threshold,
1472 : memory_order_relaxed);
1473 :
1474 15 : bth->ctx_out_q = ctx_out_q;
1475 :
1476 15 : nl_batch_reset(bth);
1477 15 : }
1478 :
1479 15 : static void nl_batch_send(struct nl_batch *bth)
1480 : {
1481 15 : struct zebra_dplane_ctx *ctx;
1482 15 : bool err = false;
1483 :
1484 15 : if (bth->curlen != 0 && bth->zns != NULL) {
1485 12 : struct nlsock *nl =
1486 12 : kernel_netlink_nlsock_lookup(bth->zns->sock);
1487 :
1488 12 : if (IS_ZEBRA_DEBUG_KERNEL)
1489 0 : zlog_debug("%s: %s, batch size=%zu, msg cnt=%zu",
1490 : __func__, nl->name, bth->curlen,
1491 : bth->msgcnt);
1492 :
1493 12 : if (netlink_send_msg(nl, bth->buf, bth->curlen) == -1)
1494 : err = true;
1495 :
1496 12 : if (!err) {
1497 12 : if (nl_batch_read_resp(bth) == -1)
1498 0 : err = true;
1499 : }
1500 : }
1501 :
1502 : /* Move remaining contexts to the outbound queue. */
1503 15 : while (true) {
1504 15 : ctx = dplane_ctx_dequeue(&(bth->ctx_list));
1505 15 : if (ctx == NULL)
1506 : break;
1507 :
1508 0 : if (err)
1509 0 : dplane_ctx_set_status(ctx,
1510 : ZEBRA_DPLANE_REQUEST_FAILURE);
1511 :
1512 0 : dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx);
1513 : }
1514 :
1515 15 : nl_batch_reset(bth);
1516 15 : }
1517 :
1518 48 : enum netlink_msg_status netlink_batch_add_msg(
1519 : struct nl_batch *bth, struct zebra_dplane_ctx *ctx,
1520 : ssize_t (*msg_encoder)(struct zebra_dplane_ctx *, void *, size_t),
1521 : bool ignore_res)
1522 : {
1523 48 : int seq;
1524 48 : ssize_t size;
1525 48 : struct nlmsghdr *msgh;
1526 48 : struct nlsock *nl;
1527 :
1528 48 : size = (*msg_encoder)(ctx, bth->buf_head, bth->bufsiz - bth->curlen);
1529 :
1530 : /*
1531 : * If there was an error while encoding the message (other than buffer
1532 : * overflow) then return an error.
1533 : */
1534 48 : if (size < 0)
1535 : return FRR_NETLINK_ERROR;
1536 :
1537 : /*
1538 : * If the message doesn't fit entirely in the buffer then send the batch
1539 : * and retry.
1540 : */
1541 48 : if (size == 0) {
1542 0 : nl_batch_send(bth);
1543 0 : size = (*msg_encoder)(ctx, bth->buf_head,
1544 0 : bth->bufsiz - bth->curlen);
1545 : /*
1546 : * If the message doesn't fit in the empty buffer then just
1547 : * return an error.
1548 : */
1549 0 : if (size <= 0)
1550 : return FRR_NETLINK_ERROR;
1551 : }
1552 :
1553 48 : seq = dplane_ctx_get_ns(ctx)->seq;
1554 48 : nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
1555 :
1556 48 : if (ignore_res)
1557 0 : seq++;
1558 :
1559 48 : msgh = (struct nlmsghdr *)bth->buf_head;
1560 48 : msgh->nlmsg_seq = seq;
1561 48 : msgh->nlmsg_pid = nl->snl.nl_pid;
1562 :
1563 48 : bth->zns = dplane_ctx_get_ns(ctx);
1564 48 : bth->buf_head = ((char *)bth->buf_head) + size;
1565 48 : bth->curlen += size;
1566 48 : bth->msgcnt++;
1567 :
1568 48 : return FRR_NETLINK_QUEUED;
1569 : }
1570 :
1571 82 : static enum netlink_msg_status nl_put_msg(struct nl_batch *bth,
1572 : struct zebra_dplane_ctx *ctx)
1573 : {
1574 82 : if (dplane_ctx_is_skip_kernel(ctx))
1575 : return FRR_NETLINK_SUCCESS;
1576 :
1577 82 : switch (dplane_ctx_get_op(ctx)) {
1578 :
1579 46 : case DPLANE_OP_ROUTE_INSTALL:
1580 : case DPLANE_OP_ROUTE_UPDATE:
1581 : case DPLANE_OP_ROUTE_DELETE:
1582 46 : return netlink_put_route_update_msg(bth, ctx);
1583 :
1584 36 : case DPLANE_OP_NH_INSTALL:
1585 : case DPLANE_OP_NH_UPDATE:
1586 : case DPLANE_OP_NH_DELETE:
1587 36 : return netlink_put_nexthop_update_msg(bth, ctx);
1588 :
1589 0 : case DPLANE_OP_LSP_INSTALL:
1590 : case DPLANE_OP_LSP_UPDATE:
1591 : case DPLANE_OP_LSP_DELETE:
1592 0 : return netlink_put_lsp_update_msg(bth, ctx);
1593 :
1594 0 : case DPLANE_OP_PW_INSTALL:
1595 : case DPLANE_OP_PW_UNINSTALL:
1596 0 : return netlink_put_pw_update_msg(bth, ctx);
1597 :
1598 0 : case DPLANE_OP_ADDR_INSTALL:
1599 : case DPLANE_OP_ADDR_UNINSTALL:
1600 0 : return netlink_put_address_update_msg(bth, ctx);
1601 :
1602 0 : case DPLANE_OP_MAC_INSTALL:
1603 : case DPLANE_OP_MAC_DELETE:
1604 0 : return netlink_put_mac_update_msg(bth, ctx);
1605 :
1606 0 : case DPLANE_OP_NEIGH_INSTALL:
1607 : case DPLANE_OP_NEIGH_UPDATE:
1608 : case DPLANE_OP_NEIGH_DELETE:
1609 : case DPLANE_OP_VTEP_ADD:
1610 : case DPLANE_OP_VTEP_DELETE:
1611 : case DPLANE_OP_NEIGH_DISCOVER:
1612 : case DPLANE_OP_NEIGH_IP_INSTALL:
1613 : case DPLANE_OP_NEIGH_IP_DELETE:
1614 : case DPLANE_OP_NEIGH_TABLE_UPDATE:
1615 0 : return netlink_put_neigh_update_msg(bth, ctx);
1616 :
1617 0 : case DPLANE_OP_RULE_ADD:
1618 : case DPLANE_OP_RULE_DELETE:
1619 : case DPLANE_OP_RULE_UPDATE:
1620 0 : return netlink_put_rule_update_msg(bth, ctx);
1621 :
1622 : case DPLANE_OP_SYS_ROUTE_ADD:
1623 : case DPLANE_OP_SYS_ROUTE_DELETE:
1624 : case DPLANE_OP_ROUTE_NOTIFY:
1625 : case DPLANE_OP_LSP_NOTIFY:
1626 : case DPLANE_OP_BR_PORT_UPDATE:
1627 : return FRR_NETLINK_SUCCESS;
1628 :
1629 : case DPLANE_OP_IPTABLE_ADD:
1630 : case DPLANE_OP_IPTABLE_DELETE:
1631 : case DPLANE_OP_IPSET_ADD:
1632 : case DPLANE_OP_IPSET_DELETE:
1633 : case DPLANE_OP_IPSET_ENTRY_ADD:
1634 : case DPLANE_OP_IPSET_ENTRY_DELETE:
1635 : return FRR_NETLINK_ERROR;
1636 :
1637 0 : case DPLANE_OP_GRE_SET:
1638 0 : return netlink_put_gre_set_msg(bth, ctx);
1639 :
1640 : case DPLANE_OP_INTF_ADDR_ADD:
1641 : case DPLANE_OP_INTF_ADDR_DEL:
1642 : case DPLANE_OP_NONE:
1643 : return FRR_NETLINK_ERROR;
1644 :
1645 0 : case DPLANE_OP_INTF_NETCONFIG:
1646 0 : return netlink_put_intf_netconfig(bth, ctx);
1647 :
1648 0 : case DPLANE_OP_INTF_INSTALL:
1649 : case DPLANE_OP_INTF_UPDATE:
1650 : case DPLANE_OP_INTF_DELETE:
1651 0 : return netlink_put_intf_update_msg(bth, ctx);
1652 :
1653 0 : case DPLANE_OP_TC_QDISC_INSTALL:
1654 : case DPLANE_OP_TC_QDISC_UNINSTALL:
1655 0 : return netlink_put_tc_qdisc_update_msg(bth, ctx);
1656 0 : case DPLANE_OP_TC_CLASS_ADD:
1657 : case DPLANE_OP_TC_CLASS_DELETE:
1658 : case DPLANE_OP_TC_CLASS_UPDATE:
1659 0 : return netlink_put_tc_class_update_msg(bth, ctx);
1660 0 : case DPLANE_OP_TC_FILTER_ADD:
1661 : case DPLANE_OP_TC_FILTER_DELETE:
1662 : case DPLANE_OP_TC_FILTER_UPDATE:
1663 0 : return netlink_put_tc_filter_update_msg(bth, ctx);
1664 : }
1665 :
1666 : return FRR_NETLINK_ERROR;
1667 : }
1668 :
1669 15 : void kernel_update_multi(struct dplane_ctx_list_head *ctx_list)
1670 : {
1671 15 : struct nl_batch batch;
1672 15 : struct zebra_dplane_ctx *ctx;
1673 15 : struct dplane_ctx_list_head handled_list;
1674 15 : enum netlink_msg_status res;
1675 :
1676 15 : dplane_ctx_q_init(&handled_list);
1677 15 : nl_batch_init(&batch, &handled_list);
1678 :
1679 97 : while (true) {
1680 97 : ctx = dplane_ctx_dequeue(ctx_list);
1681 97 : if (ctx == NULL)
1682 : break;
1683 :
1684 82 : if (batch.zns != NULL
1685 66 : && batch.zns->ns_id != dplane_ctx_get_ns(ctx)->ns_id)
1686 0 : nl_batch_send(&batch);
1687 :
1688 : /*
1689 : * Assume all messages will succeed and then mark only the ones
1690 : * that failed.
1691 : */
1692 82 : dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS);
1693 :
1694 82 : res = nl_put_msg(&batch, ctx);
1695 :
1696 82 : dplane_ctx_enqueue_tail(&(batch.ctx_list), ctx);
1697 82 : if (res == FRR_NETLINK_ERROR)
1698 0 : dplane_ctx_set_status(ctx,
1699 : ZEBRA_DPLANE_REQUEST_FAILURE);
1700 :
1701 82 : if (batch.curlen > batch.limit)
1702 0 : nl_batch_send(&batch);
1703 : }
1704 :
1705 15 : nl_batch_send(&batch);
1706 :
1707 15 : dplane_ctx_q_init(ctx_list);
1708 15 : dplane_ctx_list_append(ctx_list, &handled_list);
1709 15 : }
1710 :
1711 174 : struct nlsock *kernel_netlink_nlsock_lookup(int sock)
1712 : {
1713 174 : struct nlsock lookup, *retval;
1714 :
1715 174 : lookup.sock = sock;
1716 :
1717 174 : NLSOCK_LOCK();
1718 174 : retval = hash_lookup(nlsock_hash, &lookup);
1719 174 : NLSOCK_UNLOCK();
1720 :
1721 174 : return retval;
1722 : }
1723 :
1724 : /* Insert nlsock entry into hash */
1725 12 : static void kernel_netlink_nlsock_insert(struct nlsock *nls)
1726 : {
1727 12 : NLSOCK_LOCK();
1728 12 : (void)hash_get(nlsock_hash, nls, hash_alloc_intern);
1729 12 : NLSOCK_UNLOCK();
1730 12 : }
1731 :
1732 : /* Remove nlsock entry from hash */
1733 12 : static void kernel_netlink_nlsock_remove(struct nlsock *nls)
1734 : {
1735 12 : NLSOCK_LOCK();
1736 12 : (void)hash_release(nlsock_hash, nls);
1737 12 : NLSOCK_UNLOCK();
1738 12 : }
1739 :
1740 198 : static uint32_t kernel_netlink_nlsock_key(const void *arg)
1741 : {
1742 198 : const struct nlsock *nl = arg;
1743 :
1744 198 : return nl->sock;
1745 : }
1746 :
1747 186 : static bool kernel_netlink_nlsock_hash_equal(const void *arg1, const void *arg2)
1748 : {
1749 186 : const struct nlsock *nl1 = arg1;
1750 186 : const struct nlsock *nl2 = arg2;
1751 :
1752 186 : if (nl1->sock == nl2->sock)
1753 186 : return true;
1754 :
1755 : return false;
1756 : }
1757 :
1758 : /* Exported interface function. This function simply calls
1759 : netlink_socket (). */
1760 3 : void kernel_init(struct zebra_ns *zns)
1761 : {
1762 3 : uint32_t groups, dplane_groups, ext_groups;
1763 : #if defined SOL_NETLINK
1764 3 : int one, ret;
1765 : #endif
1766 :
1767 : /*
1768 : * Initialize netlink sockets
1769 : *
1770 : * If RTMGRP_XXX exists use that, but at some point
1771 : * I think the kernel developers realized that
1772 : * keeping track of all the different values would
1773 : * lead to confusion, so we need to convert the
1774 : * RTNLGRP_XXX to a bit position for ourself
1775 : */
1776 3 : groups = RTMGRP_LINK |
1777 : RTMGRP_IPV4_ROUTE |
1778 : RTMGRP_IPV4_IFADDR |
1779 : RTMGRP_IPV6_ROUTE |
1780 : RTMGRP_IPV6_IFADDR |
1781 : RTMGRP_IPV4_MROUTE |
1782 : RTMGRP_NEIGH |
1783 : ((uint32_t) 1 << (RTNLGRP_IPV4_RULE - 1)) |
1784 : ((uint32_t) 1 << (RTNLGRP_IPV6_RULE - 1)) |
1785 : ((uint32_t) 1 << (RTNLGRP_NEXTHOP - 1)) |
1786 : ((uint32_t) 1 << (RTNLGRP_TC - 1));
1787 :
1788 3 : dplane_groups = (RTMGRP_LINK |
1789 : RTMGRP_IPV4_IFADDR |
1790 : RTMGRP_IPV6_IFADDR |
1791 : ((uint32_t) 1 << (RTNLGRP_IPV4_NETCONF - 1)) |
1792 : ((uint32_t) 1 << (RTNLGRP_IPV6_NETCONF - 1)) |
1793 : ((uint32_t) 1 << (RTNLGRP_MPLS_NETCONF - 1)));
1794 :
1795 : /* Use setsockopt for > 31 group */
1796 3 : ext_groups = RTNLGRP_TUNNEL;
1797 :
1798 3 : snprintf(zns->netlink.name, sizeof(zns->netlink.name),
1799 : "netlink-listen (NS %u)", zns->ns_id);
1800 3 : zns->netlink.sock = -1;
1801 3 : if (netlink_socket(&zns->netlink, groups, &ext_groups, 1, zns->ns_id) <
1802 : 0) {
1803 0 : zlog_err("Failure to create %s socket",
1804 : zns->netlink.name);
1805 0 : exit(-1);
1806 : }
1807 :
1808 3 : kernel_netlink_nlsock_insert(&zns->netlink);
1809 :
1810 3 : snprintf(zns->netlink_cmd.name, sizeof(zns->netlink_cmd.name),
1811 : "netlink-cmd (NS %u)", zns->ns_id);
1812 3 : zns->netlink_cmd.sock = -1;
1813 3 : if (netlink_socket(&zns->netlink_cmd, 0, 0, 0, zns->ns_id) < 0) {
1814 0 : zlog_err("Failure to create %s socket",
1815 : zns->netlink_cmd.name);
1816 0 : exit(-1);
1817 : }
1818 :
1819 3 : kernel_netlink_nlsock_insert(&zns->netlink_cmd);
1820 :
1821 : /* Outbound socket for dplane programming of the host OS. */
1822 3 : snprintf(zns->netlink_dplane_out.name,
1823 : sizeof(zns->netlink_dplane_out.name), "netlink-dp (NS %u)",
1824 : zns->ns_id);
1825 3 : zns->netlink_dplane_out.sock = -1;
1826 3 : if (netlink_socket(&zns->netlink_dplane_out, 0, 0, 0, zns->ns_id) < 0) {
1827 0 : zlog_err("Failure to create %s socket",
1828 : zns->netlink_dplane_out.name);
1829 0 : exit(-1);
1830 : }
1831 :
1832 3 : kernel_netlink_nlsock_insert(&zns->netlink_dplane_out);
1833 :
1834 : /* Inbound socket for OS events coming to the dplane. */
1835 3 : snprintf(zns->netlink_dplane_in.name,
1836 : sizeof(zns->netlink_dplane_in.name), "netlink-dp-in (NS %u)",
1837 : zns->ns_id);
1838 3 : zns->netlink_dplane_in.sock = -1;
1839 3 : if (netlink_socket(&zns->netlink_dplane_in, dplane_groups, 0, 0,
1840 : zns->ns_id) < 0) {
1841 0 : zlog_err("Failure to create %s socket",
1842 : zns->netlink_dplane_in.name);
1843 0 : exit(-1);
1844 : }
1845 :
1846 3 : kernel_netlink_nlsock_insert(&zns->netlink_dplane_in);
1847 :
1848 : /*
1849 : * SOL_NETLINK is not available on all platforms yet
1850 : * apparently. It's in bits/socket.h which I am not
1851 : * sure that we want to pull into our build system.
1852 : */
1853 : #if defined SOL_NETLINK
1854 : /*
1855 : * Let's tell the kernel that we want to receive extended
1856 : * ACKS over our command socket(s)
1857 : */
1858 3 : one = 1;
1859 3 : ret = setsockopt(zns->netlink_cmd.sock, SOL_NETLINK, NETLINK_EXT_ACK,
1860 : &one, sizeof(one));
1861 :
1862 3 : if (ret < 0)
1863 0 : zlog_notice("Registration for extended cmd ACK failed : %d %s",
1864 : errno, safe_strerror(errno));
1865 :
1866 3 : one = 1;
1867 3 : ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK,
1868 : NETLINK_EXT_ACK, &one, sizeof(one));
1869 :
1870 3 : if (ret < 0)
1871 0 : zlog_notice("Registration for extended dp ACK failed : %d %s",
1872 : errno, safe_strerror(errno));
1873 :
1874 : /*
1875 : * Trim off the payload of the original netlink message in the
1876 : * acknowledgment. This option is available since Linux 4.2, so if
1877 : * setsockopt fails, ignore the error.
1878 : */
1879 3 : one = 1;
1880 3 : ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK,
1881 : NETLINK_CAP_ACK, &one, sizeof(one));
1882 3 : if (ret < 0)
1883 0 : zlog_notice(
1884 : "Registration for reduced ACK packet size failed, probably running an early kernel");
1885 : #endif
1886 :
1887 : /* Register kernel socket. */
1888 3 : if (fcntl(zns->netlink.sock, F_SETFL, O_NONBLOCK) < 0)
1889 0 : flog_err_sys(EC_LIB_SOCKET, "Can't set %s socket flags: %s",
1890 : zns->netlink.name, safe_strerror(errno));
1891 :
1892 3 : if (fcntl(zns->netlink_cmd.sock, F_SETFL, O_NONBLOCK) < 0)
1893 0 : zlog_err("Can't set %s socket error: %s(%d)",
1894 : zns->netlink_cmd.name, safe_strerror(errno), errno);
1895 :
1896 3 : if (fcntl(zns->netlink_dplane_out.sock, F_SETFL, O_NONBLOCK) < 0)
1897 0 : zlog_err("Can't set %s socket error: %s(%d)",
1898 : zns->netlink_dplane_out.name, safe_strerror(errno),
1899 : errno);
1900 :
1901 3 : if (fcntl(zns->netlink_dplane_in.sock, F_SETFL, O_NONBLOCK) < 0)
1902 0 : zlog_err("Can't set %s socket error: %s(%d)",
1903 : zns->netlink_dplane_in.name, safe_strerror(errno),
1904 : errno);
1905 :
1906 : /* Set receive buffer size if it's set from command line */
1907 3 : if (rcvbufsize) {
1908 3 : netlink_recvbuf(&zns->netlink, rcvbufsize);
1909 3 : netlink_recvbuf(&zns->netlink_cmd, rcvbufsize);
1910 3 : netlink_recvbuf(&zns->netlink_dplane_out, rcvbufsize);
1911 3 : netlink_recvbuf(&zns->netlink_dplane_in, rcvbufsize);
1912 : }
1913 :
1914 : /* Set filter for inbound sockets, to exclude events we've generated
1915 : * ourselves.
1916 : */
1917 3 : netlink_install_filter(zns->netlink.sock, zns->netlink_cmd.snl.nl_pid,
1918 : zns->netlink_dplane_out.snl.nl_pid);
1919 :
1920 3 : netlink_install_filter(zns->netlink_dplane_in.sock,
1921 : zns->netlink_cmd.snl.nl_pid,
1922 : zns->netlink_dplane_out.snl.nl_pid);
1923 :
1924 3 : zns->t_netlink = NULL;
1925 :
1926 3 : thread_add_read(zrouter.master, kernel_read, zns,
1927 : zns->netlink.sock, &zns->t_netlink);
1928 :
1929 3 : rt_netlink_init();
1930 3 : }
1931 :
1932 : /* Helper to clean up an nlsock */
1933 21 : static void kernel_nlsock_fini(struct nlsock *nls)
1934 : {
1935 21 : if (nls && nls->sock >= 0) {
1936 12 : kernel_netlink_nlsock_remove(nls);
1937 12 : close(nls->sock);
1938 12 : nls->sock = -1;
1939 12 : XFREE(MTYPE_NL_BUF, nls->buf);
1940 12 : nls->buflen = 0;
1941 : }
1942 21 : }
1943 :
1944 6 : void kernel_terminate(struct zebra_ns *zns, bool complete)
1945 : {
1946 6 : THREAD_OFF(zns->t_netlink);
1947 :
1948 6 : kernel_nlsock_fini(&zns->netlink);
1949 :
1950 6 : kernel_nlsock_fini(&zns->netlink_cmd);
1951 :
1952 6 : kernel_nlsock_fini(&zns->netlink_dplane_in);
1953 :
1954 : /* During zebra shutdown, we need to leave the dataplane socket
1955 : * around until all work is done.
1956 : */
1957 6 : if (complete)
1958 3 : kernel_nlsock_fini(&zns->netlink_dplane_out);
1959 6 : }
1960 :
1961 : /*
1962 : * Global init for platform-/OS-specific things
1963 : */
1964 3 : void kernel_router_init(void)
1965 : {
1966 : /* Init nlsock hash and lock */
1967 3 : pthread_mutex_init(&nlsock_mutex, NULL);
1968 3 : nlsock_hash = hash_create_size(8, kernel_netlink_nlsock_key,
1969 : kernel_netlink_nlsock_hash_equal,
1970 : "Netlink Socket Hash");
1971 3 : }
1972 :
1973 : /*
1974 : * Global deinit for platform-/OS-specific things
1975 : */
1976 3 : void kernel_router_terminate(void)
1977 : {
1978 3 : pthread_mutex_destroy(&nlsock_mutex);
1979 :
1980 3 : hash_free(nlsock_hash);
1981 3 : nlsock_hash = NULL;
1982 3 : }
1983 :
1984 : #endif /* HAVE_NETLINK */
|