Line data Source code
1 : /* BGP network related fucntions
2 : * Copyright (C) 1999 Kunihiro Ishiguro
3 : *
4 : * This file is part of GNU Zebra.
5 : *
6 : * GNU Zebra is free software; you can redistribute it and/or modify it
7 : * under the terms of the GNU General Public License as published by the
8 : * Free Software Foundation; either version 2, or (at your option) any
9 : * later version.
10 : *
11 : * GNU Zebra is distributed in the hope that it will be useful, but
12 : * WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 : * General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU General Public License along
17 : * with this program; see the file COPYING; if not, write to the Free Software
18 : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 : */
20 :
21 : #include <zebra.h>
22 :
23 : #include "thread.h"
24 : #include "sockunion.h"
25 : #include "sockopt.h"
26 : #include "memory.h"
27 : #include "log.h"
28 : #include "if.h"
29 : #include "prefix.h"
30 : #include "command.h"
31 : #include "privs.h"
32 : #include "linklist.h"
33 : #include "network.h"
34 : #include "queue.h"
35 : #include "hash.h"
36 : #include "filter.h"
37 : #include "ns.h"
38 : #include "lib_errors.h"
39 : #include "nexthop.h"
40 :
41 : #include "bgpd/bgpd.h"
42 : #include "bgpd/bgp_open.h"
43 : #include "bgpd/bgp_fsm.h"
44 : #include "bgpd/bgp_attr.h"
45 : #include "bgpd/bgp_debug.h"
46 : #include "bgpd/bgp_errors.h"
47 : #include "bgpd/bgp_network.h"
48 : #include "bgpd/bgp_zebra.h"
49 : #include "bgpd/bgp_nht.h"
50 :
51 : extern struct zebra_privs_t bgpd_privs;
52 :
53 : static char *bgp_get_bound_name(struct peer *peer);
54 :
55 0 : void bgp_dump_listener_info(struct vty *vty)
56 : {
57 0 : struct listnode *node;
58 0 : struct bgp_listener *listener;
59 :
60 0 : vty_out(vty, "Name fd Address\n");
61 0 : vty_out(vty, "---------------------------\n");
62 0 : for (ALL_LIST_ELEMENTS_RO(bm->listen_sockets, node, listener))
63 0 : vty_out(vty, "%-16s %d %pSU\n",
64 0 : listener->name ? listener->name : VRF_DEFAULT_NAME,
65 : listener->fd, &listener->su);
66 0 : }
67 :
68 : /*
69 : * Set MD5 key for the socket, for the given IPv4 peer address.
70 : * If the password is NULL or zero-length, the option will be disabled.
71 : */
72 0 : static int bgp_md5_set_socket(int socket, union sockunion *su,
73 : uint16_t prefixlen, const char *password)
74 : {
75 0 : int ret = -1;
76 0 : int en = ENOSYS;
77 : #if HAVE_DECL_TCP_MD5SIG
78 0 : union sockunion su2;
79 : #endif /* HAVE_TCP_MD5SIG */
80 :
81 0 : assert(socket >= 0);
82 :
83 : #if HAVE_DECL_TCP_MD5SIG
84 : /* Ensure there is no extraneous port information. */
85 0 : memcpy(&su2, su, sizeof(union sockunion));
86 0 : if (su2.sa.sa_family == AF_INET)
87 0 : su2.sin.sin_port = 0;
88 : else
89 0 : su2.sin6.sin6_port = 0;
90 :
91 : /* For addresses, use the non-extended signature functionality */
92 0 : if ((su2.sa.sa_family == AF_INET && prefixlen == IPV4_MAX_BITLEN)
93 0 : || (su2.sa.sa_family == AF_INET6 && prefixlen == IPV6_MAX_BITLEN))
94 0 : ret = sockopt_tcp_signature(socket, &su2, password);
95 : else
96 0 : ret = sockopt_tcp_signature_ext(socket, &su2, prefixlen,
97 : password);
98 0 : en = errno;
99 : #endif /* HAVE_TCP_MD5SIG */
100 :
101 0 : if (ret < 0) {
102 0 : switch (ret) {
103 0 : case -2:
104 0 : flog_warn(
105 : EC_BGP_NO_TCP_MD5,
106 : "Unable to set TCP MD5 option on socket for peer %pSU (sock=%d): This platform does not support MD5 auth for prefixes",
107 : su, socket);
108 0 : break;
109 0 : default:
110 0 : flog_warn(
111 : EC_BGP_NO_TCP_MD5,
112 : "Unable to set TCP MD5 option on socket for peer %pSU (sock=%d): %s",
113 : su, socket, safe_strerror(en));
114 : }
115 : }
116 :
117 0 : return ret;
118 : }
119 :
120 : /* Helper for bgp_connect */
121 0 : static int bgp_md5_set_connect(int socket, union sockunion *su,
122 : uint16_t prefixlen, const char *password)
123 : {
124 0 : int ret = -1;
125 :
126 : #if HAVE_DECL_TCP_MD5SIG
127 0 : frr_with_privs(&bgpd_privs) {
128 0 : ret = bgp_md5_set_socket(socket, su, prefixlen, password);
129 : }
130 : #endif /* HAVE_TCP_MD5SIG */
131 :
132 0 : return ret;
133 : }
134 :
135 0 : static int bgp_md5_set_password(struct peer *peer, const char *password)
136 : {
137 0 : struct listnode *node;
138 0 : int ret = 0;
139 0 : struct bgp_listener *listener;
140 :
141 : /*
142 : * Set or unset the password on the listen socket(s). Outbound
143 : * connections are taken care of in bgp_connect() below.
144 : */
145 0 : frr_with_privs(&bgpd_privs) {
146 0 : for (ALL_LIST_ELEMENTS_RO(bm->listen_sockets, node, listener))
147 0 : if (listener->su.sa.sa_family ==
148 0 : peer->su.sa.sa_family) {
149 0 : uint16_t prefixlen =
150 : peer->su.sa.sa_family == AF_INET
151 : ? IPV4_MAX_BITLEN
152 : : IPV6_MAX_BITLEN;
153 :
154 : /*
155 : * if we have stored a BGP vrf instance in the
156 : * listener it must match the bgp instance in
157 : * the peer otherwise the peer bgp instance
158 : * must be the default vrf or a view instance
159 : */
160 0 : if (!listener->bgp) {
161 0 : if (peer->bgp->vrf_id != VRF_DEFAULT)
162 0 : continue;
163 0 : } else if (listener->bgp != peer->bgp)
164 0 : continue;
165 :
166 0 : ret = bgp_md5_set_socket(listener->fd,
167 : &peer->su, prefixlen,
168 : password);
169 0 : break;
170 : }
171 : }
172 0 : return ret;
173 : }
174 :
175 0 : int bgp_md5_set_prefix(struct bgp *bgp, struct prefix *p, const char *password)
176 : {
177 0 : int ret = 0;
178 0 : union sockunion su;
179 0 : struct listnode *node;
180 0 : struct bgp_listener *listener;
181 :
182 : /* Set or unset the password on the listen socket(s). */
183 0 : frr_with_privs(&bgpd_privs) {
184 0 : for (ALL_LIST_ELEMENTS_RO(bm->listen_sockets, node, listener))
185 0 : if (listener->su.sa.sa_family == p->family
186 0 : && ((bgp->vrf_id == VRF_DEFAULT)
187 0 : || (listener->bgp == bgp))) {
188 0 : prefix2sockunion(p, &su);
189 0 : ret = bgp_md5_set_socket(listener->fd, &su,
190 0 : p->prefixlen,
191 : password);
192 0 : break;
193 : }
194 : }
195 :
196 0 : return ret;
197 : }
198 :
199 0 : int bgp_md5_unset_prefix(struct bgp *bgp, struct prefix *p)
200 : {
201 0 : return bgp_md5_set_prefix(bgp, p, NULL);
202 : }
203 :
204 0 : int bgp_md5_set(struct peer *peer)
205 : {
206 : /* Set the password from listen socket. */
207 0 : return bgp_md5_set_password(peer, peer->password);
208 : }
209 :
210 72 : static void bgp_update_setsockopt_tcp_keepalive(struct bgp *bgp, int fd)
211 : {
212 72 : if (!bgp)
213 : return;
214 72 : if (bgp->tcp_keepalive_idle != 0) {
215 0 : int ret;
216 :
217 0 : ret = setsockopt_tcp_keepalive(fd, bgp->tcp_keepalive_idle,
218 0 : bgp->tcp_keepalive_intvl,
219 0 : bgp->tcp_keepalive_probes);
220 0 : if (ret < 0)
221 0 : zlog_err(
222 : "Can't set TCP keepalive on socket %d, idle %u intvl %u probes %u",
223 : fd, bgp->tcp_keepalive_idle,
224 : bgp->tcp_keepalive_intvl,
225 : bgp->tcp_keepalive_probes);
226 : }
227 : }
228 :
229 0 : int bgp_md5_unset(struct peer *peer)
230 : {
231 : /* Unset the password from listen socket. */
232 0 : return bgp_md5_set_password(peer, NULL);
233 : }
234 :
235 70 : int bgp_set_socket_ttl(struct peer *peer, int bgp_sock)
236 : {
237 70 : int ret = 0;
238 :
239 70 : if (!peer->gtsm_hops) {
240 70 : ret = sockopt_ttl(peer->su.sa.sa_family, bgp_sock, peer->ttl);
241 70 : if (ret) {
242 0 : flog_err(
243 : EC_LIB_SOCKET,
244 : "%s: Can't set TxTTL on peer (rtrid %pI4) socket, err = %d",
245 : __func__, &peer->remote_id, errno);
246 0 : return ret;
247 : }
248 : } else {
249 : /* On Linux, setting minttl without setting ttl seems to mess
250 : with the
251 : outgoing ttl. Therefore setting both.
252 : */
253 0 : ret = sockopt_ttl(peer->su.sa.sa_family, bgp_sock, MAXTTL);
254 0 : if (ret) {
255 0 : flog_err(
256 : EC_LIB_SOCKET,
257 : "%s: Can't set TxTTL on peer (rtrid %pI4) socket, err = %d",
258 : __func__, &peer->remote_id, errno);
259 0 : return ret;
260 : }
261 0 : ret = sockopt_minttl(peer->su.sa.sa_family, bgp_sock,
262 0 : MAXTTL + 1 - peer->gtsm_hops);
263 0 : if (ret) {
264 0 : flog_err(
265 : EC_LIB_SOCKET,
266 : "%s: Can't set MinTTL on peer (rtrid %pI4) socket, err = %d",
267 : __func__, &peer->remote_id, errno);
268 0 : return ret;
269 : }
270 : }
271 :
272 : return ret;
273 : }
274 :
275 : /*
276 : * Obtain the BGP instance that the incoming connection should be processed
277 : * against. This is important because more than one VRF could be using the
278 : * same IP address space. The instance is got by obtaining the device to
279 : * which the incoming connection is bound to. This could either be a VRF
280 : * or it could be an interface, which in turn determines the VRF.
281 : */
282 36 : static int bgp_get_instance_for_inc_conn(int sock, struct bgp **bgp_inst)
283 : {
284 : #ifndef SO_BINDTODEVICE
285 : /* only Linux has SO_BINDTODEVICE, but we're in Linux-specific code here
286 : * anyway since the assumption is that the interface name returned by
287 : * getsockopt() is useful in identifying the VRF, particularly with
288 : * Linux's
289 : * VRF l3master device. The whole mechanism is specific to Linux, so...
290 : * when other platforms add VRF support, this will need handling here as
291 : * well. (or, some restructuring) */
292 : *bgp_inst = bgp_get_default();
293 : return !*bgp_inst;
294 :
295 : #else
296 36 : char name[VRF_NAMSIZ + 1];
297 36 : socklen_t name_len = VRF_NAMSIZ;
298 36 : struct bgp *bgp;
299 36 : int rc;
300 36 : struct listnode *node, *nnode;
301 :
302 36 : *bgp_inst = NULL;
303 36 : name[0] = '\0';
304 36 : rc = getsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, name, &name_len);
305 36 : if (rc != 0) {
306 : #if defined(HAVE_CUMULUS)
307 : flog_err(EC_LIB_SOCKET,
308 : "[Error] BGP SO_BINDTODEVICE get failed (%s), sock %d",
309 : safe_strerror(errno), sock);
310 : return -1;
311 : #endif
312 36 : }
313 :
314 36 : if (!strlen(name)) {
315 36 : *bgp_inst = bgp_get_default();
316 36 : return 0; /* default instance. */
317 : }
318 :
319 : /* First try match to instance; if that fails, check for interfaces. */
320 0 : bgp = bgp_lookup_by_name(name);
321 0 : if (bgp) {
322 0 : if (!bgp->vrf_id) // unexpected
323 : return -1;
324 0 : *bgp_inst = bgp;
325 0 : return 0;
326 : }
327 :
328 : /* TODO - This will be optimized once interfaces move into the NS */
329 0 : for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp)) {
330 0 : struct interface *ifp;
331 :
332 0 : if (bgp->inst_type == BGP_INSTANCE_TYPE_VIEW)
333 0 : continue;
334 :
335 0 : ifp = if_lookup_by_name(name, bgp->vrf_id);
336 0 : if (ifp) {
337 0 : *bgp_inst = bgp;
338 0 : return 0;
339 : }
340 : }
341 :
342 : /* We didn't match to either an instance or an interface. */
343 : return -1;
344 : #endif
345 : }
346 :
347 72 : static void bgp_socket_set_buffer_size(const int fd)
348 : {
349 72 : if (getsockopt_so_sendbuf(fd) < (int)bm->socket_buffer)
350 36 : setsockopt_so_sendbuf(fd, bm->socket_buffer);
351 72 : if (getsockopt_so_recvbuf(fd) < (int)bm->socket_buffer)
352 0 : setsockopt_so_recvbuf(fd, bm->socket_buffer);
353 72 : }
354 :
355 : /* Accept bgp connection. */
356 36 : static void bgp_accept(struct thread *thread)
357 : {
358 36 : int bgp_sock;
359 36 : int accept_sock;
360 36 : union sockunion su;
361 36 : struct bgp_listener *listener = THREAD_ARG(thread);
362 36 : struct peer *peer;
363 36 : struct peer *peer1;
364 36 : char buf[SU_ADDRSTRLEN];
365 36 : struct bgp *bgp = NULL;
366 :
367 36 : sockunion_init(&su);
368 :
369 36 : bgp = bgp_lookup_by_name(listener->name);
370 :
371 : /* Register accept thread. */
372 36 : accept_sock = THREAD_FD(thread);
373 36 : if (accept_sock < 0) {
374 0 : flog_err_sys(EC_LIB_SOCKET,
375 : "[Error] BGP accept socket fd is negative: %d",
376 : accept_sock);
377 2 : return;
378 : }
379 :
380 36 : thread_add_read(bm->master, bgp_accept, listener, accept_sock,
381 : &listener->thread);
382 :
383 : /* Accept client connection. */
384 36 : bgp_sock = sockunion_accept(accept_sock, &su);
385 36 : int save_errno = errno;
386 36 : if (bgp_sock < 0) {
387 0 : if (save_errno == EINVAL) {
388 0 : struct vrf *vrf =
389 0 : bgp ? vrf_lookup_by_id(bgp->vrf_id) : NULL;
390 :
391 : /*
392 : * It appears that sometimes, when VRFs are deleted on
393 : * the system, it takes a little while for us to get
394 : * notified about that. In the meantime we endlessly
395 : * loop on accept(), because the socket, having been
396 : * bound to a now-deleted VRF device, is in some weird
397 : * state which causes accept() to fail.
398 : *
399 : * To avoid this, if we see accept() fail with EINVAL,
400 : * we cancel ourselves and trust that when the VRF
401 : * deletion notification comes in the event handler for
402 : * that will take care of cleaning us up.
403 : */
404 0 : flog_err_sys(
405 : EC_LIB_SOCKET,
406 : "[Error] accept() failed with error \"%s\" on BGP listener socket %d for BGP instance in VRF \"%s\"; refreshing socket",
407 : safe_strerror(save_errno), accept_sock,
408 : VRF_LOGNAME(vrf));
409 0 : THREAD_OFF(listener->thread);
410 : } else {
411 0 : flog_err_sys(
412 : EC_LIB_SOCKET,
413 : "[Error] BGP socket accept failed (%s); retrying",
414 : safe_strerror(save_errno));
415 : }
416 0 : return;
417 : }
418 36 : set_nonblocking(bgp_sock);
419 :
420 : /* Obtain BGP instance this connection is meant for.
421 : * - if it is a VRF netns sock, then BGP is in listener structure
422 : * - otherwise, the bgp instance need to be demultiplexed
423 : */
424 36 : if (listener->bgp)
425 0 : bgp = listener->bgp;
426 36 : else if (bgp_get_instance_for_inc_conn(bgp_sock, &bgp)) {
427 0 : if (bgp_debug_neighbor_events(NULL))
428 0 : zlog_debug(
429 : "[Event] Could not get instance for incoming conn from %s",
430 : inet_sutop(&su, buf));
431 0 : close(bgp_sock);
432 0 : return;
433 : }
434 :
435 36 : bgp_socket_set_buffer_size(bgp_sock);
436 :
437 : /* Set TCP keepalive when TCP keepalive is enabled */
438 36 : bgp_update_setsockopt_tcp_keepalive(bgp, bgp_sock);
439 :
440 : /* Check remote IP address */
441 36 : peer1 = peer_lookup(bgp, &su);
442 :
443 36 : if (!peer1) {
444 2 : peer1 = peer_lookup_dynamic_neighbor(bgp, &su);
445 2 : if (peer1) {
446 : /* Dynamic neighbor has been created, let it proceed */
447 0 : peer1->fd = bgp_sock;
448 :
449 : /* Set the user configured MSS to TCP socket */
450 0 : if (CHECK_FLAG(peer1->flags, PEER_FLAG_TCP_MSS))
451 0 : sockopt_tcp_mss_set(bgp_sock, peer1->tcp_mss);
452 :
453 0 : bgp_fsm_change_status(peer1, Active);
454 0 : THREAD_OFF(
455 : peer1->t_start); /* created in peer_create() */
456 :
457 0 : if (peer_active(peer1)) {
458 0 : if (CHECK_FLAG(peer1->flags,
459 : PEER_FLAG_TIMER_DELAYOPEN))
460 0 : BGP_EVENT_ADD(
461 : peer1,
462 : TCP_connection_open_w_delay);
463 : else
464 0 : BGP_EVENT_ADD(peer1,
465 : TCP_connection_open);
466 : }
467 :
468 0 : return;
469 : }
470 : }
471 :
472 36 : if (!peer1) {
473 2 : if (bgp_debug_neighbor_events(NULL)) {
474 0 : zlog_debug(
475 : "[Event] %s connection rejected(%s:%u:%s) - not configured and not valid for dynamic",
476 : inet_sutop(&su, buf), bgp->name_pretty, bgp->as,
477 : VRF_LOGNAME(vrf_lookup_by_id(bgp->vrf_id)));
478 : }
479 2 : close(bgp_sock);
480 2 : return;
481 : }
482 :
483 34 : if (CHECK_FLAG(peer1->flags, PEER_FLAG_SHUTDOWN)
484 34 : || CHECK_FLAG(peer1->bgp->flags, BGP_FLAG_SHUTDOWN)) {
485 0 : if (bgp_debug_neighbor_events(peer1))
486 0 : zlog_debug(
487 : "[Event] connection from %s rejected(%s:%u:%s) due to admin shutdown",
488 : inet_sutop(&su, buf), bgp->name_pretty, bgp->as,
489 : VRF_LOGNAME(vrf_lookup_by_id(bgp->vrf_id)));
490 0 : close(bgp_sock);
491 0 : return;
492 : }
493 :
494 : /*
495 : * Do not accept incoming connections in Clearing state. This can result
496 : * in incorect state transitions - e.g., the connection goes back to
497 : * Established and then the Clearing_Completed event is generated. Also,
498 : * block incoming connection in Deleted state.
499 : */
500 34 : if (peer1->status == Clearing || peer1->status == Deleted) {
501 0 : if (bgp_debug_neighbor_events(peer1))
502 0 : zlog_debug(
503 : "[Event] Closing incoming conn for %s (%p) state %d",
504 : peer1->host, peer1, peer1->status);
505 0 : close(bgp_sock);
506 0 : return;
507 : }
508 :
509 : /* Check that at least one AF is activated for the peer. */
510 34 : if (!peer_active(peer1)) {
511 0 : if (bgp_debug_neighbor_events(peer1))
512 0 : zlog_debug(
513 : "%s - incoming conn rejected - no AF activated for peer",
514 : peer1->host);
515 0 : close(bgp_sock);
516 0 : return;
517 : }
518 :
519 : /* Do not try to reconnect if the peer reached maximum
520 : * prefixes, restart timer is still running or the peer
521 : * is shutdown.
522 : */
523 34 : if (BGP_PEER_START_SUPPRESSED(peer1)) {
524 0 : if (bgp_debug_neighbor_events(peer1))
525 0 : zlog_debug(
526 : "[Event] Incoming BGP connection rejected from %s due to maximum-prefix or shutdown",
527 : peer1->host);
528 0 : close(bgp_sock);
529 0 : return;
530 : }
531 :
532 34 : if (bgp_debug_neighbor_events(peer1))
533 0 : zlog_debug(
534 : "[Event] connection from %s fd %d, active peer status %d fd %d",
535 : inet_sutop(&su, buf), bgp_sock, peer1->status,
536 : peer1->fd);
537 :
538 34 : if (peer1->doppelganger) {
539 : /* We have an existing connection. Kill the existing one and run
540 : with this one.
541 : */
542 0 : if (bgp_debug_neighbor_events(peer1))
543 0 : zlog_debug(
544 : "[Event] New active connection from peer %s, Killing previous active connection",
545 : peer1->host);
546 0 : peer_delete(peer1->doppelganger);
547 : }
548 :
549 34 : if (bgp_set_socket_ttl(peer1, bgp_sock) < 0)
550 0 : if (bgp_debug_neighbor_events(peer1))
551 0 : zlog_debug(
552 : "[Event] Unable to set min/max TTL on peer %s, Continuing",
553 : peer1->host);
554 :
555 34 : peer = peer_create(&su, peer1->conf_if, peer1->bgp, peer1->local_as,
556 : peer1->as, peer1->as_type, NULL, false);
557 :
558 34 : peer_xfer_config(peer, peer1);
559 34 : bgp_peer_gr_flags_update(peer);
560 :
561 150 : BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp,
562 : peer->bgp->peer);
563 :
564 34 : if (bgp_peer_gr_mode_get(peer) == PEER_DISABLE) {
565 :
566 0 : UNSET_FLAG(peer->sflags, PEER_STATUS_NSF_MODE);
567 :
568 0 : if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT)) {
569 0 : peer_nsf_stop(peer);
570 : }
571 : }
572 :
573 34 : peer->doppelganger = peer1;
574 34 : peer1->doppelganger = peer;
575 34 : peer->fd = bgp_sock;
576 68 : frr_with_privs(&bgpd_privs) {
577 34 : vrf_bind(peer->bgp->vrf_id, bgp_sock, bgp_get_bound_name(peer));
578 : }
579 34 : bgp_peer_reg_with_nht(peer);
580 34 : bgp_fsm_change_status(peer, Active);
581 34 : THREAD_OFF(peer->t_start); /* created in peer_create() */
582 :
583 34 : SET_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER);
584 : /* Make dummy peer until read Open packet. */
585 34 : if (peer_established(peer1)
586 0 : && CHECK_FLAG(peer1->sflags, PEER_STATUS_NSF_MODE)) {
587 : /* If we have an existing established connection with graceful
588 : * restart
589 : * capability announced with one or more address families, then
590 : * drop
591 : * existing established connection and move state to connect.
592 : */
593 0 : peer1->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
594 :
595 0 : if (CHECK_FLAG(peer1->flags, PEER_FLAG_GRACEFUL_RESTART)
596 0 : || CHECK_FLAG(peer1->flags,
597 : PEER_FLAG_GRACEFUL_RESTART_HELPER))
598 0 : SET_FLAG(peer1->sflags, PEER_STATUS_NSF_WAIT);
599 :
600 0 : bgp_event_update(peer1, TCP_connection_closed);
601 : }
602 :
603 34 : if (peer_active(peer)) {
604 34 : if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER_DELAYOPEN))
605 0 : BGP_EVENT_ADD(peer, TCP_connection_open_w_delay);
606 : else
607 34 : BGP_EVENT_ADD(peer, TCP_connection_open);
608 : }
609 :
610 : /*
611 : * If we are doing nht for a peer that is v6 LL based
612 : * massage the event system to make things happy
613 : */
614 34 : bgp_nht_interface_events(peer);
615 : }
616 :
617 : /* BGP socket bind. */
618 70 : static char *bgp_get_bound_name(struct peer *peer)
619 : {
620 70 : if (!peer)
621 : return NULL;
622 :
623 70 : if ((peer->bgp->vrf_id == VRF_DEFAULT) && !peer->ifname
624 70 : && !peer->conf_if)
625 : return NULL;
626 :
627 0 : if (peer->su.sa.sa_family != AF_INET
628 0 : && peer->su.sa.sa_family != AF_INET6)
629 : return NULL; // unexpected
630 :
631 : /* For IPv6 peering, interface (unnumbered or link-local with interface)
632 : * takes precedence over VRF. For IPv4 peering, explicit interface or
633 : * VRF are the situations to bind.
634 : */
635 0 : if (peer->su.sa.sa_family == AF_INET6 && peer->conf_if)
636 : return peer->conf_if;
637 :
638 0 : if (peer->ifname)
639 : return peer->ifname;
640 :
641 0 : if (peer->bgp->inst_type == BGP_INSTANCE_TYPE_VIEW)
642 : return NULL;
643 :
644 0 : return peer->bgp->name;
645 : }
646 :
647 0 : int bgp_update_address(struct interface *ifp, const union sockunion *dst,
648 : union sockunion *addr)
649 : {
650 0 : struct prefix *p, *sel, d;
651 0 : struct connected *connected;
652 0 : struct listnode *node;
653 0 : int common;
654 :
655 0 : if (!sockunion2hostprefix(dst, &d))
656 : return 1;
657 :
658 0 : sel = NULL;
659 0 : common = -1;
660 :
661 0 : for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) {
662 0 : p = connected->address;
663 0 : if (p->family != d.family)
664 0 : continue;
665 0 : if (prefix_common_bits(p, &d) > common) {
666 0 : sel = p;
667 0 : common = prefix_common_bits(sel, &d);
668 : }
669 : }
670 :
671 0 : if (!sel)
672 : return 1;
673 :
674 0 : prefix2sockunion(sel, addr);
675 0 : return 0;
676 : }
677 :
678 : /* Update source selection. */
679 36 : static int bgp_update_source(struct peer *peer)
680 : {
681 36 : struct interface *ifp;
682 36 : union sockunion addr;
683 36 : int ret = 0;
684 :
685 36 : sockunion_init(&addr);
686 :
687 : /* Source is specified with interface name. */
688 36 : if (peer->update_if) {
689 0 : ifp = if_lookup_by_name(peer->update_if, peer->bgp->vrf_id);
690 0 : if (!ifp)
691 : return -1;
692 :
693 0 : if (bgp_update_address(ifp, &peer->su, &addr))
694 : return -1;
695 :
696 0 : ret = sockunion_bind(peer->fd, &addr, 0, &addr);
697 : }
698 :
699 : /* Source is specified with IP address. */
700 36 : if (peer->update_source)
701 0 : ret = sockunion_bind(peer->fd, peer->update_source, 0,
702 : peer->update_source);
703 :
704 : return ret;
705 : }
706 :
707 : /* BGP try to connect to the peer. */
708 36 : int bgp_connect(struct peer *peer)
709 : {
710 36 : assert(!CHECK_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON));
711 36 : assert(!CHECK_FLAG(peer->thread_flags, PEER_THREAD_READS_ON));
712 36 : ifindex_t ifindex = 0;
713 :
714 36 : if (peer->conf_if && BGP_PEER_SU_UNSPEC(peer)) {
715 0 : if (bgp_debug_neighbor_events(peer))
716 0 : zlog_debug("Peer address not learnt: Returning from connect");
717 0 : return 0;
718 : }
719 36 : frr_with_privs(&bgpd_privs) {
720 : /* Make socket for the peer. */
721 36 : peer->fd = vrf_sockunion_socket(&peer->su, peer->bgp->vrf_id,
722 36 : bgp_get_bound_name(peer));
723 : }
724 36 : if (peer->fd < 0) {
725 0 : peer->last_reset = PEER_DOWN_SOCKET_ERROR;
726 0 : if (bgp_debug_neighbor_events(peer))
727 0 : zlog_debug("%s: Failure to create socket for connection to %s, error received: %s(%d)",
728 : __func__, peer->host, safe_strerror(errno),
729 : errno);
730 0 : return -1;
731 : }
732 :
733 36 : set_nonblocking(peer->fd);
734 :
735 : /* Set the user configured MSS to TCP socket */
736 36 : if (CHECK_FLAG(peer->flags, PEER_FLAG_TCP_MSS))
737 0 : sockopt_tcp_mss_set(peer->fd, peer->tcp_mss);
738 :
739 36 : bgp_socket_set_buffer_size(peer->fd);
740 :
741 : /* Set TCP keepalive when TCP keepalive is enabled */
742 36 : bgp_update_setsockopt_tcp_keepalive(peer->bgp, peer->fd);
743 :
744 36 : if (bgp_set_socket_ttl(peer, peer->fd) < 0) {
745 0 : peer->last_reset = PEER_DOWN_SOCKET_ERROR;
746 0 : if (bgp_debug_neighbor_events(peer))
747 0 : zlog_debug("%s: Failure to set socket ttl for connection to %s, error received: %s(%d)",
748 : __func__, peer->host, safe_strerror(errno),
749 : errno);
750 :
751 0 : return -1;
752 : }
753 :
754 36 : sockopt_reuseaddr(peer->fd);
755 36 : sockopt_reuseport(peer->fd);
756 :
757 : #ifdef IPTOS_PREC_INTERNETCONTROL
758 72 : frr_with_privs(&bgpd_privs) {
759 36 : if (sockunion_family(&peer->su) == AF_INET)
760 35 : setsockopt_ipv4_tos(peer->fd, bm->tcp_dscp);
761 1 : else if (sockunion_family(&peer->su) == AF_INET6)
762 1 : setsockopt_ipv6_tclass(peer->fd, bm->tcp_dscp);
763 : }
764 : #endif
765 :
766 36 : if (peer->password) {
767 0 : uint16_t prefixlen = peer->su.sa.sa_family == AF_INET
768 : ? IPV4_MAX_BITLEN
769 : : IPV6_MAX_BITLEN;
770 :
771 0 : bgp_md5_set_connect(peer->fd, &peer->su, prefixlen,
772 : peer->password);
773 : }
774 :
775 : /* Update source bind. */
776 36 : if (bgp_update_source(peer) < 0) {
777 0 : peer->last_reset = PEER_DOWN_SOCKET_ERROR;
778 0 : return connect_error;
779 : }
780 :
781 36 : if (peer->conf_if || peer->ifname)
782 0 : ifindex = ifname2ifindex(peer->conf_if ? peer->conf_if
783 : : peer->ifname,
784 0 : peer->bgp->vrf_id);
785 :
786 36 : if (bgp_debug_neighbor_events(peer))
787 0 : zlog_debug("%s [Event] Connect start to %s fd %d", peer->host,
788 : peer->host, peer->fd);
789 :
790 : /* Connect to the remote peer. */
791 36 : return sockunion_connect(peer->fd, &peer->su, htons(peer->port),
792 : ifindex);
793 : }
794 :
795 : /* After TCP connection is established. Get local address and port. */
796 141 : int bgp_getsockname(struct peer *peer)
797 : {
798 141 : if (peer->su_local) {
799 56 : sockunion_free(peer->su_local);
800 56 : peer->su_local = NULL;
801 : }
802 :
803 141 : if (peer->su_remote) {
804 56 : sockunion_free(peer->su_remote);
805 56 : peer->su_remote = NULL;
806 : }
807 :
808 141 : peer->su_local = sockunion_getsockname(peer->fd);
809 141 : if (!peer->su_local)
810 : return -1;
811 141 : peer->su_remote = sockunion_getpeername(peer->fd);
812 141 : if (!peer->su_remote)
813 : return -1;
814 :
815 141 : if (!bgp_zebra_nexthop_set(peer->su_local, peer->su_remote,
816 : &peer->nexthop, peer)) {
817 0 : flog_err(EC_BGP_NH_UPD,
818 : "%s: nexthop_set failed, resetting connection - intf %p",
819 : peer->host, peer->nexthop.ifp);
820 0 : return -1;
821 : }
822 : return 0;
823 : }
824 :
825 :
826 90 : static int bgp_listener(int sock, struct sockaddr *sa, socklen_t salen,
827 : struct bgp *bgp)
828 : {
829 90 : struct bgp_listener *listener;
830 90 : int ret, en;
831 :
832 90 : sockopt_reuseaddr(sock);
833 90 : sockopt_reuseport(sock);
834 :
835 180 : frr_with_privs(&bgpd_privs) {
836 :
837 : #ifdef IPTOS_PREC_INTERNETCONTROL
838 90 : if (sa->sa_family == AF_INET)
839 45 : setsockopt_ipv4_tos(sock, bm->tcp_dscp);
840 45 : else if (sa->sa_family == AF_INET6)
841 45 : setsockopt_ipv6_tclass(sock, bm->tcp_dscp);
842 : #endif
843 :
844 90 : sockopt_v6only(sa->sa_family, sock);
845 :
846 90 : ret = bind(sock, sa, salen);
847 90 : en = errno;
848 : }
849 :
850 90 : if (ret < 0) {
851 0 : flog_err_sys(EC_LIB_SOCKET, "bind: %s", safe_strerror(en));
852 0 : return ret;
853 : }
854 :
855 90 : ret = listen(sock, SOMAXCONN);
856 90 : if (ret < 0) {
857 0 : flog_err_sys(EC_LIB_SOCKET, "listen: %s", safe_strerror(errno));
858 0 : return ret;
859 : }
860 :
861 90 : listener = XCALLOC(MTYPE_BGP_LISTENER, sizeof(*listener));
862 90 : listener->fd = sock;
863 90 : listener->name = XSTRDUP(MTYPE_BGP_LISTENER, bgp->name);
864 :
865 : /* this socket is in a vrf record bgp back pointer */
866 90 : if (bgp->vrf_id != VRF_DEFAULT)
867 0 : listener->bgp = bgp;
868 :
869 90 : memcpy(&listener->su, sa, salen);
870 90 : thread_add_read(bm->master, bgp_accept, listener, sock,
871 : &listener->thread);
872 90 : listnode_add(bm->listen_sockets, listener);
873 :
874 90 : return 0;
875 : }
876 :
877 : /* IPv6 supported version of BGP server socket setup. */
878 45 : int bgp_socket(struct bgp *bgp, unsigned short port, const char *address)
879 : {
880 45 : struct addrinfo *ainfo;
881 45 : struct addrinfo *ainfo_save;
882 45 : static const struct addrinfo req = {
883 : .ai_family = AF_UNSPEC,
884 : .ai_flags = AI_PASSIVE,
885 : .ai_socktype = SOCK_STREAM,
886 : };
887 45 : int ret, count;
888 45 : char port_str[BUFSIZ];
889 :
890 45 : snprintf(port_str, sizeof(port_str), "%d", port);
891 45 : port_str[sizeof(port_str) - 1] = '\0';
892 :
893 45 : frr_with_privs(&bgpd_privs) {
894 45 : ret = vrf_getaddrinfo(address, port_str, &req, &ainfo_save,
895 : bgp->vrf_id);
896 : }
897 45 : if (ret != 0) {
898 0 : flog_err_sys(EC_LIB_SOCKET, "getaddrinfo: %s",
899 : gai_strerror(ret));
900 0 : return -1;
901 : }
902 45 : if (bgp_option_check(BGP_OPT_NO_ZEBRA) &&
903 0 : bgp->vrf_id != VRF_DEFAULT) {
904 0 : freeaddrinfo(ainfo_save);
905 0 : return -1;
906 : }
907 45 : count = 0;
908 135 : for (ainfo = ainfo_save; ainfo; ainfo = ainfo->ai_next) {
909 90 : int sock;
910 :
911 90 : if (ainfo->ai_family != AF_INET && ainfo->ai_family != AF_INET6)
912 0 : continue;
913 :
914 90 : frr_with_privs(&bgpd_privs) {
915 90 : sock = vrf_socket(ainfo->ai_family,
916 : ainfo->ai_socktype,
917 : ainfo->ai_protocol,
918 : bgp->vrf_id,
919 90 : (bgp->inst_type
920 : == BGP_INSTANCE_TYPE_VRF
921 : ? bgp->name : NULL));
922 : }
923 90 : if (sock < 0) {
924 0 : flog_err_sys(EC_LIB_SOCKET, "socket: %s",
925 : safe_strerror(errno));
926 0 : continue;
927 : }
928 :
929 : /* if we intend to implement ttl-security, this socket needs
930 : * ttl=255 */
931 90 : sockopt_ttl(ainfo->ai_family, sock, MAXTTL);
932 :
933 90 : ret = bgp_listener(sock, ainfo->ai_addr, ainfo->ai_addrlen,
934 : bgp);
935 90 : if (ret == 0)
936 90 : ++count;
937 : else
938 0 : close(sock);
939 : }
940 45 : freeaddrinfo(ainfo_save);
941 45 : if (count == 0 && bgp->inst_type != BGP_INSTANCE_TYPE_VRF) {
942 0 : flog_err(
943 : EC_LIB_SOCKET,
944 : "%s: no usable addresses please check other programs usage of specified port %d",
945 : __func__, port);
946 0 : flog_err_sys(EC_LIB_SOCKET, "%s: Program cannot continue",
947 : __func__);
948 0 : exit(-1);
949 : }
950 :
951 : return 0;
952 : }
953 :
954 : /* this function closes vrf socket
955 : * this should be called only for vrf socket with netns backend
956 : */
957 45 : void bgp_close_vrf_socket(struct bgp *bgp)
958 : {
959 45 : struct listnode *node, *next;
960 45 : struct bgp_listener *listener;
961 :
962 45 : if (!bgp)
963 : return;
964 :
965 45 : if (bm->listen_sockets == NULL)
966 : return;
967 :
968 45 : for (ALL_LIST_ELEMENTS(bm->listen_sockets, node, next, listener)) {
969 0 : if (listener->bgp == bgp) {
970 0 : THREAD_OFF(listener->thread);
971 0 : close(listener->fd);
972 0 : listnode_delete(bm->listen_sockets, listener);
973 0 : XFREE(MTYPE_BGP_LISTENER, listener->name);
974 0 : XFREE(MTYPE_BGP_LISTENER, listener);
975 : }
976 : }
977 : }
978 :
979 : /* this function closes main socket
980 : */
981 141 : void bgp_close(void)
982 : {
983 141 : struct listnode *node, *next;
984 141 : struct bgp_listener *listener;
985 :
986 141 : if (bm->listen_sockets == NULL)
987 : return;
988 :
989 138 : for (ALL_LIST_ELEMENTS(bm->listen_sockets, node, next, listener)) {
990 90 : if (listener->bgp)
991 0 : continue;
992 90 : THREAD_OFF(listener->thread);
993 90 : close(listener->fd);
994 90 : listnode_delete(bm->listen_sockets, listener);
995 90 : XFREE(MTYPE_BGP_LISTENER, listener->name);
996 90 : XFREE(MTYPE_BGP_LISTENER, listener);
997 : }
998 : }
|