Line data Source code
1 : /* BGP Nexthop tracking
2 : * Copyright (C) 2013 Cumulus Networks, Inc.
3 : *
4 : * This file is part of GNU Zebra.
5 : *
6 : * GNU Zebra is free software; you can redistribute it and/or modify it
7 : * under the terms of the GNU General Public License as published by the
8 : * Free Software Foundation; either version 2, or (at your option) any
9 : * later version.
10 : *
11 : * GNU Zebra is distributed in the hope that it will be useful, but
12 : * WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 : * General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU General Public License along
17 : * with this program; see the file COPYING; if not, write to the Free Software
18 : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 : */
20 :
21 : #include <zebra.h>
22 :
23 : #include "command.h"
24 : #include "thread.h"
25 : #include "prefix.h"
26 : #include "zclient.h"
27 : #include "stream.h"
28 : #include "network.h"
29 : #include "log.h"
30 : #include "memory.h"
31 : #include "nexthop.h"
32 : #include "vrf.h"
33 : #include "filter.h"
34 : #include "nexthop_group.h"
35 :
36 : #include "bgpd/bgpd.h"
37 : #include "bgpd/bgp_table.h"
38 : #include "bgpd/bgp_route.h"
39 : #include "bgpd/bgp_attr.h"
40 : #include "bgpd/bgp_nexthop.h"
41 : #include "bgpd/bgp_debug.h"
42 : #include "bgpd/bgp_errors.h"
43 : #include "bgpd/bgp_nht.h"
44 : #include "bgpd/bgp_fsm.h"
45 : #include "bgpd/bgp_zebra.h"
46 : #include "bgpd/bgp_flowspec_util.h"
47 : #include "bgpd/bgp_evpn.h"
48 : #include "bgpd/bgp_rd.h"
49 :
50 : extern struct zclient *zclient;
51 :
52 : static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
53 : static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
54 : static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
55 : static void bgp_nht_ifp_initial(struct thread *thread);
56 :
57 11 : static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
58 : {
59 11 : return (bgp_zebra_num_connects() == 0
60 11 : || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
61 9 : && bnc->nexthop_num > 0));
62 : }
63 :
64 0 : static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
65 : struct bgp_path_info *path)
66 : {
67 0 : struct interface *ifp = NULL;
68 0 : struct nexthop *nexthop;
69 0 : struct bgp_interface *iifp;
70 0 : struct peer *peer;
71 :
72 0 : if (!path->extra || !path->extra->peer_orig)
73 : return false;
74 :
75 0 : peer = path->extra->peer_orig;
76 :
77 : /* only connected ebgp peers are valid */
78 0 : if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
79 0 : CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
80 0 : CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
81 : return false;
82 :
83 0 : for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
84 0 : if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
85 0 : nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
86 : nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
87 0 : ifp = if_lookup_by_index(
88 0 : bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
89 0 : bnc->bgp->vrf_id);
90 : }
91 0 : if (!ifp)
92 0 : continue;
93 0 : iifp = ifp->info;
94 0 : if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
95 : return true;
96 : }
97 : return false;
98 : }
99 :
100 0 : static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
101 : struct bgp_path_info *path)
102 : {
103 0 : struct interface *ifp = NULL;
104 0 : struct nexthop *nexthop;
105 :
106 0 : for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
107 0 : if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
108 0 : ifp = if_lookup_by_index(
109 0 : bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
110 0 : bnc->bgp->vrf_id);
111 0 : if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
112 : ifp->ll_type == ZEBRA_LLT_IP6GRE))
113 : break;
114 : }
115 : }
116 0 : if (!ifp)
117 : return false;
118 :
119 0 : if (CHECK_FLAG(path->attr->rmap_change_flags,
120 : BATTR_RMAP_L3VPN_ACCEPT_GRE))
121 0 : return true;
122 :
123 : return false;
124 : }
125 :
126 0 : static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
127 : struct bgp_path_info *path)
128 : {
129 : /*
130 : * - In the case of MPLS-VPN, the label is learned from LDP or other
131 : * protocols, and nexthop tracking is enabled for the label.
132 : * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
133 : * - In the case of SRv6-VPN, we need to track the reachability to the
134 : * SID (in other words, IPv6 address). As in MPLS, we need to record
135 : * the value as BGP_NEXTHOP_SID_VALID. However, this function is
136 : * currently not implemented, and this function assumes that all
137 : * Transit routes for SRv6-VPN are valid.
138 : * - Otherwise check for mpls-gre acceptance
139 : */
140 0 : return (bgp_zebra_num_connects() == 0 ||
141 0 : (bnc && (bnc->nexthop_num > 0 &&
142 0 : (CHECK_FLAG(path->flags, BGP_PATH_ACCEPT_OWN) ||
143 0 : CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
144 0 : bnc->bgp->srv6_enabled ||
145 0 : bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
146 0 : bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
147 : }
148 :
149 4 : static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
150 : {
151 4 : if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
152 2 : if (BGP_DEBUG(nht, NHT))
153 0 : zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
154 : &bnc->prefix, bnc->ifindex, bnc->srte_color,
155 : bnc->bgp->name_pretty);
156 : /* only unregister if this is the last nh for this prefix*/
157 2 : if (!bnc_existing_for_prefix(bnc))
158 2 : unregister_zebra_rnh(bnc);
159 2 : bnc_free(bnc);
160 : }
161 4 : }
162 :
163 6 : void bgp_unlink_nexthop(struct bgp_path_info *path)
164 : {
165 6 : struct bgp_nexthop_cache *bnc = path->nexthop;
166 :
167 6 : if (!bnc)
168 : return;
169 :
170 2 : path_nh_map(path, NULL, false);
171 :
172 2 : bgp_unlink_nexthop_check(bnc);
173 : }
174 :
175 1 : void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
176 : {
177 1 : struct prefix pp;
178 1 : struct prefix pt;
179 1 : struct bgp_nexthop_cache *bncp, *bnct;
180 1 : afi_t afi;
181 1 : ifindex_t ifindex = 0;
182 :
183 1 : if (!sockunion2hostprefix(&from->su, &pp))
184 0 : return;
185 :
186 : /*
187 : * Gather the ifindex for if up/down events to be
188 : * tagged into this fun
189 : */
190 1 : if (from->conf_if && IN6_IS_ADDR_LINKLOCAL(&from->su.sin6.sin6_addr))
191 0 : ifindex = from->su.sin6.sin6_scope_id;
192 :
193 1 : afi = family2afi(pp.family);
194 1 : bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
195 :
196 1 : if (!sockunion2hostprefix(&to->su, &pt))
197 : return;
198 :
199 : /*
200 : * Gather the ifindex for if up/down events to be
201 : * tagged into this fun
202 : */
203 1 : ifindex = 0;
204 1 : if (to->conf_if && IN6_IS_ADDR_LINKLOCAL(&to->su.sin6.sin6_addr))
205 0 : ifindex = to->su.sin6.sin6_scope_id;
206 1 : bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
207 :
208 1 : if (bnct != bncp)
209 : return;
210 :
211 1 : if (bnct)
212 1 : bnct->nht_info = to;
213 : }
214 :
215 : /*
216 : * Returns the bnc whose bnc->nht_info matches the LL peer by
217 : * looping through the IPv6 nexthop table
218 : */
219 : static struct bgp_nexthop_cache *
220 0 : bgp_find_ipv6_nexthop_matching_peer(struct peer *peer)
221 : {
222 0 : struct bgp_nexthop_cache *bnc;
223 :
224 0 : frr_each (bgp_nexthop_cache, &peer->bgp->nexthop_cache_table[AFI_IP6],
225 : bnc) {
226 0 : if (bnc->nht_info == peer) {
227 0 : if (BGP_DEBUG(nht, NHT)) {
228 0 : zlog_debug(
229 : "Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
230 : &bnc->prefix, bnc->ifindex,
231 : bnc->srte_color, bnc, peer->host,
232 : peer->bgp->name_pretty, peer);
233 : }
234 0 : return bnc;
235 : }
236 : }
237 :
238 0 : if (BGP_DEBUG(nht, NHT))
239 0 : zlog_debug(
240 : "Could not find bnc for peer %s(%s) %p in v6 nexthop table",
241 : peer->host, peer->bgp->name_pretty, peer);
242 :
243 : return NULL;
244 : }
245 :
246 2 : void bgp_unlink_nexthop_by_peer(struct peer *peer)
247 : {
248 2 : struct prefix p;
249 2 : struct bgp_nexthop_cache *bnc;
250 2 : afi_t afi = family2afi(peer->su.sa.sa_family);
251 2 : ifindex_t ifindex = 0;
252 :
253 2 : if (!sockunion2hostprefix(&peer->su, &p)) {
254 : /*
255 : * In scenarios where unnumbered BGP session is brought
256 : * down by shutting down the interface before unconfiguring
257 : * the BGP neighbor, neighbor information in peer->su.sa
258 : * will be cleared when the interface is shutdown. So
259 : * during the deletion of unnumbered bgp peer, above check
260 : * will return true. Therefore, in this case,BGP needs to
261 : * find the bnc whose bnc->nht_info matches the
262 : * peer being deleted and free it.
263 : */
264 0 : bnc = bgp_find_ipv6_nexthop_matching_peer(peer);
265 : } else {
266 : /*
267 : * Gather the ifindex for if up/down events to be
268 : * tagged into this fun
269 : */
270 2 : if (afi == AFI_IP6 &&
271 0 : IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
272 0 : ifindex = peer->su.sin6.sin6_scope_id;
273 2 : bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0,
274 : ifindex);
275 : }
276 :
277 2 : if (!bnc)
278 0 : return;
279 :
280 : /* cleanup the peer reference */
281 2 : bnc->nht_info = NULL;
282 :
283 2 : bgp_unlink_nexthop_check(bnc);
284 : }
285 :
286 : /*
287 : * A route and its nexthop might belong to different VRFs. Therefore,
288 : * we need both the bgp_route and bgp_nexthop pointers.
289 : */
290 9 : int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
291 : afi_t afi, safi_t safi, struct bgp_path_info *pi,
292 : struct peer *peer, int connected,
293 : const struct prefix *orig_prefix)
294 : {
295 9 : struct bgp_nexthop_cache_head *tree = NULL;
296 9 : struct bgp_nexthop_cache *bnc;
297 9 : struct bgp_path_info *bpi_ultimate;
298 9 : struct prefix p;
299 9 : uint32_t srte_color = 0;
300 9 : int is_bgp_static_route = 0;
301 9 : ifindex_t ifindex = 0;
302 :
303 9 : if (pi) {
304 2 : is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
305 2 : && (pi->sub_type == BGP_ROUTE_STATIC))
306 : ? 1
307 2 : : 0;
308 :
309 : /* Since Extended Next-hop Encoding (RFC5549) support, we want
310 : to derive
311 : address-family from the next-hop. */
312 2 : if (!is_bgp_static_route)
313 2 : afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
314 2 : : AFI_IP;
315 :
316 : /* Validation for the ipv4 mapped ipv6 nexthop. */
317 2 : if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
318 2 : afi = AFI_IP;
319 : }
320 :
321 : /* This will return true if the global IPv6 NH is a link local
322 : * addr */
323 2 : if (make_prefix(afi, pi, &p) < 0)
324 : return 1;
325 :
326 2 : if (!is_bgp_static_route && orig_prefix
327 2 : && prefix_same(&p, orig_prefix)) {
328 0 : if (BGP_DEBUG(nht, NHT)) {
329 0 : zlog_debug(
330 : "%s(%pFX): prefix loops through itself",
331 : __func__, &p);
332 : }
333 0 : return 0;
334 : }
335 :
336 2 : srte_color = pi->attr->srte_color;
337 7 : } else if (peer) {
338 : /*
339 : * Gather the ifindex for if up/down events to be
340 : * tagged into this fun
341 : */
342 7 : if (afi == AFI_IP6 &&
343 0 : IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) {
344 0 : ifindex = peer->su.sin6.sin6_scope_id;
345 0 : if (ifindex == 0) {
346 0 : if (BGP_DEBUG(nht, NHT)) {
347 0 : zlog_debug(
348 : "%s: Unable to locate ifindex, waiting till we have one",
349 : peer->conf_if);
350 : }
351 0 : return 0;
352 : }
353 : }
354 :
355 7 : if (!sockunion2hostprefix(&peer->su, &p)) {
356 0 : if (BGP_DEBUG(nht, NHT)) {
357 0 : zlog_debug(
358 : "%s: Attempting to register with unknown AFI %d (not %d or %d)",
359 : __func__, afi, AFI_IP, AFI_IP6);
360 : }
361 0 : return 0;
362 : }
363 : } else
364 : return 0;
365 :
366 9 : if (is_bgp_static_route)
367 0 : tree = &bgp_nexthop->import_check_table[afi];
368 : else
369 9 : tree = &bgp_nexthop->nexthop_cache_table[afi];
370 :
371 9 : bnc = bnc_find(tree, &p, srte_color, ifindex);
372 9 : if (!bnc) {
373 2 : bnc = bnc_new(tree, &p, srte_color, ifindex);
374 2 : bnc->bgp = bgp_nexthop;
375 2 : if (BGP_DEBUG(nht, NHT))
376 0 : zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
377 : &bnc->prefix, bnc->ifindex, bnc->srte_color,
378 : bnc->bgp->name_pretty, peer);
379 : } else {
380 7 : if (BGP_DEBUG(nht, NHT))
381 0 : zlog_debug(
382 : "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
383 : &bnc->prefix, bnc->ifindex,
384 : bnc->bgp->name_pretty, bnc->flags, bnc->ifindex,
385 : bnc->path_count, bnc->nht_info);
386 : }
387 :
388 9 : if (pi && is_route_parent_evpn(pi))
389 0 : bnc->is_evpn_gwip_nexthop = true;
390 :
391 9 : if (is_bgp_static_route) {
392 0 : SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
393 :
394 : /* If we're toggling the type, re-register */
395 0 : if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
396 0 : && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
397 0 : SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
398 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
399 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
400 0 : } else if ((!CHECK_FLAG(bgp_route->flags,
401 : BGP_FLAG_IMPORT_CHECK))
402 0 : && CHECK_FLAG(bnc->flags,
403 : BGP_STATIC_ROUTE_EXACT_MATCH)) {
404 0 : UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
405 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
406 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
407 : }
408 : }
409 : /* When nexthop is already known, but now requires 'connected'
410 : * resolution,
411 : * re-register it. The reverse scenario where the nexthop currently
412 : * requires
413 : * 'connected' resolution does not need a re-register (i.e., we treat
414 : * 'connected-required' as an override) except in the scenario where
415 : * this
416 : * is actually a case of tracking a peer for connectivity (e.g., after
417 : * disable connected-check).
418 : * NOTE: We don't track the number of paths separately for 'connected-
419 : * required' vs 'connected-not-required' as this change is not a common
420 : * scenario.
421 : */
422 9 : else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
423 2 : SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
424 2 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
425 2 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
426 7 : } else if (peer && !connected
427 0 : && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
428 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
429 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
430 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
431 : }
432 9 : if (peer && (bnc->ifindex != ifindex)) {
433 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
434 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
435 0 : bnc->ifindex = ifindex;
436 : }
437 9 : if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
438 0 : SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
439 0 : SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
440 9 : } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
441 2 : && !is_default_host_route(&bnc->prefix))
442 2 : register_zebra_rnh(bnc);
443 :
444 9 : if (pi && pi->nexthop != bnc) {
445 : /* Unlink from existing nexthop cache, if any. This will also
446 : * free
447 : * the nexthop cache entry, if appropriate.
448 : */
449 2 : bgp_unlink_nexthop(pi);
450 :
451 : /* updates NHT pi list reference */
452 2 : path_nh_map(pi, bnc, true);
453 :
454 2 : bpi_ultimate = bgp_get_imported_bpi_ultimate(pi);
455 2 : if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
456 0 : (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
457 0 : bnc->metric;
458 2 : else if (bpi_ultimate->extra)
459 0 : bpi_ultimate->extra->igpmetric = 0;
460 7 : } else if (peer) {
461 : /*
462 : * Let's not accidentally save the peer data for a peer
463 : * we are going to throw away in a second or so.
464 : * When we come back around we'll fix up this
465 : * data properly in replace_nexthop_by_peer
466 : */
467 7 : if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
468 5 : bnc->nht_info = (void *)peer; /* NHT peer reference */
469 : }
470 :
471 : /*
472 : * We are cheating here. Views have no associated underlying
473 : * ability to detect nexthops. So when we have a view
474 : * just tell everyone the nexthop is valid
475 : */
476 9 : if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
477 : return 1;
478 9 : else if (safi == SAFI_UNICAST && pi &&
479 2 : pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
480 0 : pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
481 0 : return bgp_isvalid_nexthop_for_mpls(bnc, pi);
482 : else
483 9 : return (bgp_isvalid_nexthop(bnc));
484 : }
485 :
486 1 : void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
487 : {
488 1 : struct bgp_nexthop_cache *bnc;
489 1 : struct prefix p;
490 1 : ifindex_t ifindex = 0;
491 :
492 1 : if (!peer)
493 1 : return;
494 :
495 : /*
496 : * In case the below check evaluates true and if
497 : * the bnc has not been freed at this point, then
498 : * we might have to do something similar to what's
499 : * done in bgp_unlink_nexthop_by_peer(). Since
500 : * bgp_unlink_nexthop_by_peer() loops through the
501 : * nodes of V6 nexthop cache to find the bnc, it is
502 : * currently not being called here.
503 : */
504 1 : if (!sockunion2hostprefix(&peer->su, &p))
505 : return;
506 : /*
507 : * Gather the ifindex for if up/down events to be
508 : * tagged into this fun
509 : */
510 1 : if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
511 0 : ifindex = peer->su.sin6.sin6_scope_id;
512 1 : bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
513 : &p, 0, ifindex);
514 1 : if (!bnc) {
515 1 : if (BGP_DEBUG(nht, NHT))
516 0 : zlog_debug(
517 : "Cannot find connected NHT node for peer %s(%s)",
518 : peer->host, peer->bgp->name_pretty);
519 1 : return;
520 : }
521 :
522 0 : if (bnc->nht_info != peer) {
523 0 : if (BGP_DEBUG(nht, NHT))
524 0 : zlog_debug(
525 : "Connected NHT %p node for peer %s(%s) points to %p",
526 : bnc, peer->host, bnc->bgp->name_pretty,
527 : bnc->nht_info);
528 0 : return;
529 : }
530 :
531 0 : bnc->nht_info = NULL;
532 :
533 0 : if (LIST_EMPTY(&(bnc->paths))) {
534 0 : if (BGP_DEBUG(nht, NHT))
535 0 : zlog_debug(
536 : "Freeing connected NHT node %p for peer %s(%s)",
537 : bnc, peer->host, bnc->bgp->name_pretty);
538 0 : unregister_zebra_rnh(bnc);
539 0 : bnc_free(bnc);
540 : }
541 : }
542 :
543 2 : static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
544 : struct zapi_route *nhr,
545 : bool import_check)
546 : {
547 2 : struct nexthop *nexthop;
548 2 : struct nexthop *oldnh;
549 2 : struct nexthop *nhlist_head = NULL;
550 2 : struct nexthop *nhlist_tail = NULL;
551 2 : int i;
552 2 : bool evpn_resolved = false;
553 :
554 2 : bnc->last_update = monotime(NULL);
555 2 : bnc->change_flags = 0;
556 :
557 : /* debug print the input */
558 2 : if (BGP_DEBUG(nht, NHT)) {
559 0 : char bnc_buf[BNC_FLAG_DUMP_SIZE];
560 :
561 0 : zlog_debug(
562 : "%s(%u): Rcvd NH update %pFX(%u)%u) - metric %d/%d #nhops %d/%d flags %s",
563 : bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
564 : bnc->ifindex, bnc->srte_color, nhr->metric, bnc->metric,
565 : nhr->nexthop_num, bnc->nexthop_num,
566 : bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
567 : sizeof(bnc_buf)));
568 : }
569 :
570 2 : if (nhr->metric != bnc->metric)
571 0 : bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
572 :
573 2 : if (nhr->nexthop_num != bnc->nexthop_num)
574 2 : bnc->change_flags |= BGP_NEXTHOP_CHANGED;
575 :
576 2 : if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
577 0 : !prefix_same(&bnc->prefix, &nhr->prefix))) {
578 0 : SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
579 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
580 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
581 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
582 :
583 0 : bnc_nexthop_free(bnc);
584 0 : bnc->nexthop = NULL;
585 :
586 0 : if (BGP_DEBUG(nht, NHT))
587 0 : zlog_debug(
588 : "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
589 : __func__, &bnc->prefix, &nhr->prefix);
590 2 : } else if (nhr->nexthop_num) {
591 2 : struct peer *peer = bnc->nht_info;
592 :
593 : /* notify bgp fsm if nbr ip goes from invalid->valid */
594 2 : if (!bnc->nexthop_num)
595 2 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
596 :
597 2 : if (!bnc->is_evpn_gwip_nexthop)
598 2 : bnc->flags |= BGP_NEXTHOP_VALID;
599 2 : bnc->metric = nhr->metric;
600 2 : bnc->nexthop_num = nhr->nexthop_num;
601 :
602 2 : bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
603 :
604 4 : for (i = 0; i < nhr->nexthop_num; i++) {
605 2 : int num_labels = 0;
606 :
607 2 : nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
608 :
609 : /*
610 : * Turn on RA for the v6 nexthops
611 : * we receive from bgp. This is to allow us
612 : * to work with v4 routing over v6 nexthops
613 : */
614 2 : if (peer && !peer->ifp
615 2 : && CHECK_FLAG(peer->flags,
616 : PEER_FLAG_CAPABILITY_ENHE)
617 0 : && nhr->prefix.family == AF_INET6
618 0 : && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
619 0 : struct interface *ifp;
620 :
621 0 : ifp = if_lookup_by_index(nexthop->ifindex,
622 : nexthop->vrf_id);
623 0 : if (ifp)
624 0 : zclient_send_interface_radv_req(
625 : zclient, nexthop->vrf_id, ifp,
626 : true,
627 : BGP_UNNUM_DEFAULT_RA_INTERVAL);
628 : }
629 : /* There is at least one label-switched path */
630 2 : if (nexthop->nh_label &&
631 0 : nexthop->nh_label->num_labels) {
632 :
633 0 : bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
634 0 : num_labels = nexthop->nh_label->num_labels;
635 : }
636 :
637 2 : if (BGP_DEBUG(nht, NHT)) {
638 0 : char buf[NEXTHOP_STRLEN];
639 0 : zlog_debug(
640 : " nhop via %s (%d labels)",
641 : nexthop2str(nexthop, buf, sizeof(buf)),
642 : num_labels);
643 : }
644 :
645 2 : if (nhlist_tail) {
646 0 : nhlist_tail->next = nexthop;
647 0 : nhlist_tail = nexthop;
648 : } else {
649 : nhlist_tail = nexthop;
650 : nhlist_head = nexthop;
651 : }
652 :
653 : /* No need to evaluate the nexthop if we have already
654 : * determined
655 : * that there has been a change.
656 : */
657 2 : if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
658 2 : continue;
659 :
660 0 : for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
661 0 : if (nexthop_same(oldnh, nexthop))
662 : break;
663 :
664 0 : if (!oldnh)
665 0 : bnc->change_flags |= BGP_NEXTHOP_CHANGED;
666 : }
667 2 : bnc_nexthop_free(bnc);
668 2 : bnc->nexthop = nhlist_head;
669 :
670 : /*
671 : * Gateway IP nexthop is L3 reachable. Mark it as
672 : * BGP_NEXTHOP_VALID only if it is recursively resolved with a
673 : * remote EVPN RT-2.
674 : * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
675 : * When its mapping with EVPN RT-2 is established, unset
676 : * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
677 : */
678 2 : if (bnc->is_evpn_gwip_nexthop) {
679 0 : evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
680 :
681 0 : if (BGP_DEBUG(nht, NHT))
682 0 : zlog_debug(
683 : "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
684 : &bnc->prefix,
685 : (evpn_resolved ? "successful"
686 : : "failed"));
687 :
688 0 : if (evpn_resolved) {
689 0 : bnc->flags |= BGP_NEXTHOP_VALID;
690 0 : bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
691 0 : bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
692 : } else {
693 0 : bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
694 0 : bnc->flags &= ~BGP_NEXTHOP_VALID;
695 : }
696 : }
697 : } else {
698 0 : bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
699 0 : bnc->flags &= ~BGP_NEXTHOP_VALID;
700 0 : bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
701 0 : bnc->nexthop_num = nhr->nexthop_num;
702 :
703 : /* notify bgp fsm if nbr ip goes from valid->invalid */
704 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
705 :
706 0 : bnc_nexthop_free(bnc);
707 0 : bnc->nexthop = NULL;
708 : }
709 :
710 2 : evaluate_paths(bnc);
711 2 : }
712 :
713 24 : static void bgp_nht_ifp_table_handle(struct bgp *bgp,
714 : struct bgp_nexthop_cache_head *table,
715 : struct interface *ifp, bool up)
716 : {
717 24 : struct bgp_nexthop_cache *bnc;
718 :
719 48 : frr_each (bgp_nexthop_cache, table, bnc) {
720 0 : if (bnc->ifindex != ifp->ifindex)
721 0 : continue;
722 :
723 0 : bnc->last_update = monotime(NULL);
724 0 : bnc->change_flags = 0;
725 :
726 : /*
727 : * For interface based routes ( ala the v6 LL routes
728 : * that this was written for ) the metric received
729 : * for the connected route is 0 not 1.
730 : */
731 0 : bnc->metric = 0;
732 0 : if (up) {
733 0 : SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
734 0 : SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
735 0 : bnc->nexthop_num = 1;
736 : } else {
737 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
738 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
739 0 : SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
740 0 : bnc->nexthop_num = 0;
741 : }
742 :
743 0 : evaluate_paths(bnc);
744 : }
745 24 : }
746 6 : static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
747 : {
748 6 : struct bgp *bgp;
749 :
750 6 : bgp = ifp->vrf->info;
751 6 : if (!bgp)
752 : return;
753 :
754 6 : bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
755 : up);
756 6 : bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
757 : up);
758 6 : bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
759 : up);
760 6 : bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
761 : up);
762 : }
763 :
764 4 : void bgp_nht_ifp_up(struct interface *ifp)
765 : {
766 4 : bgp_nht_ifp_handle(ifp, true);
767 0 : }
768 :
769 2 : void bgp_nht_ifp_down(struct interface *ifp)
770 : {
771 2 : bgp_nht_ifp_handle(ifp, false);
772 0 : }
773 :
774 0 : static void bgp_nht_ifp_initial(struct thread *thread)
775 : {
776 0 : ifindex_t ifindex = THREAD_VAL(thread);
777 0 : struct bgp *bgp = THREAD_ARG(thread);
778 0 : struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
779 :
780 0 : if (!ifp)
781 : return;
782 :
783 0 : if (BGP_DEBUG(nht, NHT))
784 0 : zlog_debug(
785 : "Handle NHT initial update for Intf %s(%d) status %s",
786 : ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
787 :
788 0 : if (if_is_up(ifp))
789 0 : bgp_nht_ifp_up(ifp);
790 : else
791 0 : bgp_nht_ifp_down(ifp);
792 : }
793 :
794 : /*
795 : * So the bnc code has the ability to handle interface up/down
796 : * events to properly handle v6 LL peering.
797 : * What is happening here:
798 : * The event system for peering expects the nht code to
799 : * report on the tracking events after we move to active
800 : * So let's give the system a chance to report on that event
801 : * in a manner that is expected.
802 : */
803 8 : void bgp_nht_interface_events(struct peer *peer)
804 : {
805 8 : struct bgp *bgp = peer->bgp;
806 8 : struct bgp_nexthop_cache_head *table;
807 8 : struct bgp_nexthop_cache *bnc;
808 8 : struct prefix p;
809 8 : ifindex_t ifindex = 0;
810 :
811 8 : if (!IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
812 8 : return;
813 :
814 0 : if (!sockunion2hostprefix(&peer->su, &p))
815 : return;
816 : /*
817 : * Gather the ifindex for if up/down events to be
818 : * tagged into this fun
819 : */
820 0 : if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
821 0 : ifindex = peer->su.sin6.sin6_scope_id;
822 :
823 0 : table = &bgp->nexthop_cache_table[AFI_IP6];
824 0 : bnc = bnc_find(table, &p, 0, ifindex);
825 0 : if (!bnc)
826 : return;
827 :
828 0 : if (bnc->ifindex)
829 0 : thread_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
830 : bnc->ifindex, NULL);
831 : }
832 :
833 2 : void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
834 : {
835 2 : struct bgp_nexthop_cache_head *tree = NULL;
836 2 : struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
837 2 : struct bgp *bgp;
838 2 : struct prefix match;
839 2 : struct zapi_route nhr;
840 2 : afi_t afi;
841 :
842 2 : bgp = bgp_lookup_by_vrf_id(vrf_id);
843 2 : if (!bgp) {
844 0 : flog_err(
845 : EC_BGP_NH_UPD,
846 : "parse nexthop update: instance not found for vrf_id %u",
847 : vrf_id);
848 0 : return;
849 : }
850 :
851 2 : if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
852 0 : zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
853 : bgp->name_pretty);
854 0 : return;
855 : }
856 :
857 2 : afi = family2afi(match.family);
858 2 : tree = &bgp->nexthop_cache_table[afi];
859 :
860 2 : bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
861 2 : if (!bnc_nhc) {
862 0 : if (BGP_DEBUG(nht, NHT))
863 0 : zlog_debug(
864 : "parse nexthop update(%pFX(%u)(%s)): bnc info not found for nexthop cache",
865 : &nhr.prefix, nhr.srte_color, bgp->name_pretty);
866 : } else
867 2 : bgp_process_nexthop_update(bnc_nhc, &nhr, false);
868 :
869 2 : tree = &bgp->import_check_table[afi];
870 :
871 2 : bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
872 2 : if (!bnc_import) {
873 2 : if (BGP_DEBUG(nht, NHT))
874 0 : zlog_debug(
875 : "parse nexthop update(%pFX(%u)(%s)): bnc info not found for import check",
876 : &nhr.prefix, nhr.srte_color, bgp->name_pretty);
877 : } else
878 0 : bgp_process_nexthop_update(bnc_import, &nhr, true);
879 :
880 : /*
881 : * HACK: if any BGP route is dependant on an SR-policy that doesn't
882 : * exist, zebra will never send NH updates relative to that policy. In
883 : * that case, whenever we receive an update about a colorless NH, update
884 : * the corresponding colorful NHs that share the same endpoint but that
885 : * are inactive. This ugly hack should work around the problem at the
886 : * cost of a performance pernalty. Long term, what should be done is to
887 : * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
888 : * which should provide a better infrastructure to solve this issue in
889 : * a more efficient and elegant way.
890 : */
891 2 : if (nhr.srte_color == 0 && bnc_nhc) {
892 2 : struct bgp_nexthop_cache *bnc_iter;
893 :
894 8 : frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
895 : bnc_iter) {
896 2 : if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
897 2 : bnc_iter->srte_color == 0 ||
898 0 : CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
899 2 : continue;
900 :
901 0 : bgp_process_nexthop_update(bnc_iter, &nhr, false);
902 : }
903 : }
904 : }
905 :
906 : /*
907 : * Cleanup nexthop registration and status information for BGP nexthops
908 : * pertaining to this VRF. This is invoked upon VRF deletion.
909 : */
910 0 : void bgp_cleanup_nexthops(struct bgp *bgp)
911 : {
912 0 : for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
913 0 : struct bgp_nexthop_cache *bnc;
914 :
915 0 : frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
916 : bnc) {
917 : /* Clear relevant flags. */
918 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
919 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
920 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
921 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
922 : }
923 : }
924 0 : }
925 :
926 : /**
927 : * make_prefix - make a prefix structure from the path (essentially
928 : * path's node.
929 : */
930 2 : static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
931 : {
932 :
933 2 : int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
934 2 : && (pi->sub_type == BGP_ROUTE_STATIC))
935 : ? 1
936 2 : : 0;
937 2 : struct bgp_dest *net = pi->net;
938 2 : const struct prefix *p_orig = bgp_dest_get_prefix(net);
939 2 : struct in_addr ipv4;
940 :
941 2 : if (p_orig->family == AF_FLOWSPEC) {
942 0 : if (!pi->peer)
943 : return -1;
944 0 : return bgp_flowspec_get_first_nh(pi->peer->bgp,
945 : pi, p, afi);
946 : }
947 2 : memset(p, 0, sizeof(struct prefix));
948 2 : switch (afi) {
949 2 : case AFI_IP:
950 2 : p->family = AF_INET;
951 2 : if (is_bgp_static) {
952 0 : p->u.prefix4 = p_orig->u.prefix4;
953 0 : p->prefixlen = p_orig->prefixlen;
954 : } else {
955 2 : if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
956 0 : ipv4_mapped_ipv6_to_ipv4(
957 0 : &pi->attr->mp_nexthop_global, &ipv4);
958 0 : p->u.prefix4 = ipv4;
959 0 : p->prefixlen = IPV4_MAX_BITLEN;
960 : } else {
961 2 : if (p_orig->family == AF_EVPN)
962 0 : p->u.prefix4 =
963 : pi->attr->mp_nexthop_global_in;
964 : else
965 2 : p->u.prefix4 = pi->attr->nexthop;
966 2 : p->prefixlen = IPV4_MAX_BITLEN;
967 : }
968 : }
969 : break;
970 0 : case AFI_IP6:
971 0 : p->family = AF_INET6;
972 :
973 0 : if (is_bgp_static) {
974 0 : p->u.prefix6 = p_orig->u.prefix6;
975 0 : p->prefixlen = p_orig->prefixlen;
976 : } else {
977 : /* If we receive MP_REACH nexthop with ::(LL)
978 : * or LL(LL), use LL address as nexthop cache.
979 : */
980 0 : if (pi->attr->mp_nexthop_len
981 : == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
982 0 : && (IN6_IS_ADDR_UNSPECIFIED(
983 : &pi->attr->mp_nexthop_global)
984 0 : || IN6_IS_ADDR_LINKLOCAL(
985 : &pi->attr->mp_nexthop_global)))
986 0 : p->u.prefix6 = pi->attr->mp_nexthop_local;
987 : /* If we receive MR_REACH with (GA)::(LL)
988 : * then check for route-map to choose GA or LL
989 : */
990 0 : else if (pi->attr->mp_nexthop_len
991 : == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
992 0 : if (pi->attr->mp_nexthop_prefer_global)
993 0 : p->u.prefix6 =
994 : pi->attr->mp_nexthop_global;
995 : else
996 0 : p->u.prefix6 =
997 : pi->attr->mp_nexthop_local;
998 : } else
999 0 : p->u.prefix6 = pi->attr->mp_nexthop_global;
1000 0 : p->prefixlen = IPV6_MAX_BITLEN;
1001 : }
1002 : break;
1003 0 : default:
1004 0 : if (BGP_DEBUG(nht, NHT)) {
1005 0 : zlog_debug(
1006 : "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
1007 : __func__, afi, AFI_IP, AFI_IP6);
1008 : }
1009 : break;
1010 : }
1011 : return 0;
1012 : }
1013 :
1014 : /**
1015 : * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
1016 : * command to Zebra.
1017 : * ARGUMENTS:
1018 : * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1019 : * int command -- command to send to zebra
1020 : * RETURNS:
1021 : * void.
1022 : */
1023 4 : static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
1024 : {
1025 4 : bool exact_match = false;
1026 4 : bool resolve_via_default = false;
1027 4 : int ret;
1028 :
1029 4 : if (!zclient)
1030 : return;
1031 :
1032 : /* Don't try to register if Zebra doesn't know of this instance. */
1033 4 : if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
1034 0 : if (BGP_DEBUG(zebra, ZEBRA))
1035 0 : zlog_debug(
1036 : "%s: No zebra instance to talk to, not installing NHT entry",
1037 : __func__);
1038 0 : return;
1039 : }
1040 :
1041 4 : if (!bgp_zebra_num_connects()) {
1042 0 : if (BGP_DEBUG(zebra, ZEBRA))
1043 0 : zlog_debug(
1044 : "%s: We have not connected yet, cannot send nexthops",
1045 : __func__);
1046 : }
1047 4 : if (command == ZEBRA_NEXTHOP_REGISTER) {
1048 2 : if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
1049 2 : exact_match = true;
1050 2 : if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
1051 0 : resolve_via_default = true;
1052 : }
1053 :
1054 4 : if (BGP_DEBUG(zebra, ZEBRA))
1055 0 : zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
1056 : zserv_command_string(command), &bnc->prefix,
1057 : bnc->bgp->name_pretty);
1058 :
1059 8 : ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
1060 : exact_match, resolve_via_default,
1061 4 : bnc->bgp->vrf_id);
1062 4 : if (ret == ZCLIENT_SEND_FAILURE) {
1063 0 : flog_warn(EC_BGP_ZEBRA_SEND,
1064 : "sendmsg_nexthop: zclient_send_message() failed");
1065 0 : return;
1066 : }
1067 :
1068 4 : if (command == ZEBRA_NEXTHOP_REGISTER)
1069 2 : SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1070 2 : else if (command == ZEBRA_NEXTHOP_UNREGISTER)
1071 2 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1072 : return;
1073 : }
1074 :
1075 : /**
1076 : * register_zebra_rnh - register a NH/route with Zebra for notification
1077 : * when the route or the route to the nexthop changes.
1078 : * ARGUMENTS:
1079 : * struct bgp_nexthop_cache *bnc
1080 : * RETURNS:
1081 : * void.
1082 : */
1083 2 : static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
1084 : {
1085 : /* Check if we have already registered */
1086 2 : if (bnc->flags & BGP_NEXTHOP_REGISTERED)
1087 : return;
1088 :
1089 2 : if (bnc->ifindex) {
1090 0 : SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1091 0 : return;
1092 : }
1093 :
1094 2 : sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
1095 : }
1096 :
1097 : /**
1098 : * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
1099 : * ARGUMENTS:
1100 : * struct bgp_nexthop_cache *bnc
1101 : * RETURNS:
1102 : * void.
1103 : */
1104 2 : static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
1105 : {
1106 : /* Check if we have already registered */
1107 2 : if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
1108 : return;
1109 :
1110 2 : if (bnc->ifindex) {
1111 0 : UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1112 0 : return;
1113 : }
1114 :
1115 2 : sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
1116 : }
1117 :
1118 : /**
1119 : * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1120 : * ARGUMENTS:
1121 : * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1122 : * RETURNS:
1123 : * void.
1124 : */
1125 2 : void evaluate_paths(struct bgp_nexthop_cache *bnc)
1126 : {
1127 2 : struct bgp_dest *dest;
1128 2 : struct bgp_path_info *path;
1129 2 : struct bgp_path_info *bpi_ultimate;
1130 2 : int afi;
1131 2 : struct peer *peer = (struct peer *)bnc->nht_info;
1132 2 : struct bgp_table *table;
1133 2 : safi_t safi;
1134 2 : struct bgp *bgp_path;
1135 2 : const struct prefix *p;
1136 :
1137 2 : if (BGP_DEBUG(nht, NHT)) {
1138 0 : char bnc_buf[BNC_FLAG_DUMP_SIZE];
1139 0 : char chg_buf[BNC_FLAG_DUMP_SIZE];
1140 :
1141 0 : zlog_debug(
1142 : "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1143 : &bnc->prefix, bnc->ifindex, bnc->srte_color,
1144 : bnc->bgp->name_pretty,
1145 : bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
1146 : sizeof(bnc_buf)),
1147 : bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
1148 : sizeof(bnc_buf)));
1149 : }
1150 :
1151 2 : LIST_FOREACH (path, &(bnc->paths), nh_thread) {
1152 0 : if (!(path->type == ZEBRA_ROUTE_BGP
1153 0 : && ((path->sub_type == BGP_ROUTE_NORMAL)
1154 : || (path->sub_type == BGP_ROUTE_STATIC)
1155 0 : || (path->sub_type == BGP_ROUTE_IMPORTED))))
1156 0 : continue;
1157 :
1158 0 : dest = path->net;
1159 0 : assert(dest && bgp_dest_table(dest));
1160 0 : p = bgp_dest_get_prefix(dest);
1161 0 : afi = family2afi(p->family);
1162 0 : table = bgp_dest_table(dest);
1163 0 : safi = table->safi;
1164 :
1165 : /*
1166 : * handle routes from other VRFs (they can have a
1167 : * nexthop in THIS VRF). bgp_path is the bgp instance
1168 : * that owns the route referencing this nexthop.
1169 : */
1170 0 : bgp_path = table->bgp;
1171 :
1172 : /*
1173 : * Path becomes valid/invalid depending on whether the nexthop
1174 : * reachable/unreachable.
1175 : *
1176 : * In case of unicast routes that were imported from vpn
1177 : * and that have labels, they are valid only if there are
1178 : * nexthops with labels
1179 : *
1180 : * If the nexthop is EVPN gateway-IP,
1181 : * do not check for a valid label.
1182 : */
1183 :
1184 0 : bool bnc_is_valid_nexthop = false;
1185 0 : bool path_valid = false;
1186 :
1187 0 : if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
1188 0 : && path->extra && path->extra->num_labels
1189 0 : && (path->attr->evpn_overlay.type
1190 : != OVERLAY_INDEX_GATEWAY_IP)) {
1191 0 : bnc_is_valid_nexthop =
1192 0 : bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
1193 : : false;
1194 : } else {
1195 0 : if (bgp_update_martian_nexthop(
1196 0 : bnc->bgp, afi, safi, path->type,
1197 0 : path->sub_type, path->attr, dest)) {
1198 0 : if (BGP_DEBUG(nht, NHT))
1199 0 : zlog_debug(
1200 : "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
1201 : __func__, dest, bgp_path->name);
1202 : } else
1203 0 : bnc_is_valid_nexthop =
1204 0 : bgp_isvalid_nexthop(bnc) ? true : false;
1205 : }
1206 :
1207 0 : if (BGP_DEBUG(nht, NHT)) {
1208 0 : if (dest->pdest)
1209 0 : zlog_debug(
1210 : "... eval path %d/%d %pBD RD %pRD %s flags 0x%x",
1211 : afi, safi, dest,
1212 : (struct prefix_rd *)bgp_dest_get_prefix(
1213 : dest->pdest),
1214 : bgp_path->name_pretty, path->flags);
1215 : else
1216 0 : zlog_debug(
1217 : "... eval path %d/%d %pBD %s flags 0x%x",
1218 : afi, safi, dest, bgp_path->name_pretty,
1219 : path->flags);
1220 : }
1221 :
1222 : /* Skip paths marked for removal or as history. */
1223 0 : if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
1224 0 : || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
1225 0 : continue;
1226 :
1227 : /* Copy the metric to the path. Will be used for bestpath
1228 : * computation */
1229 0 : bpi_ultimate = bgp_get_imported_bpi_ultimate(path);
1230 0 : if (bgp_isvalid_nexthop(bnc) && bnc->metric)
1231 0 : (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
1232 0 : bnc->metric;
1233 0 : else if (bpi_ultimate->extra)
1234 0 : bpi_ultimate->extra->igpmetric = 0;
1235 :
1236 0 : if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
1237 : || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
1238 0 : || path->attr->srte_color != 0)
1239 0 : SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
1240 :
1241 0 : path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
1242 0 : if (path_valid != bnc_is_valid_nexthop) {
1243 0 : if (path_valid) {
1244 : /* No longer valid, clear flag; also for EVPN
1245 : * routes, unimport from VRFs if needed.
1246 : */
1247 0 : bgp_aggregate_decrement(bgp_path, p, path, afi,
1248 : safi);
1249 0 : bgp_path_info_unset_flag(dest, path,
1250 : BGP_PATH_VALID);
1251 0 : if (safi == SAFI_EVPN &&
1252 0 : bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1253 0 : bgp_evpn_unimport_route(bgp_path,
1254 : afi, safi, bgp_dest_get_prefix(dest), path);
1255 : } else {
1256 : /* Path becomes valid, set flag; also for EVPN
1257 : * routes, import from VRFs if needed.
1258 : */
1259 0 : bgp_path_info_set_flag(dest, path,
1260 : BGP_PATH_VALID);
1261 0 : bgp_aggregate_increment(bgp_path, p, path, afi,
1262 : safi);
1263 0 : if (safi == SAFI_EVPN &&
1264 0 : bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1265 0 : bgp_evpn_import_route(bgp_path,
1266 : afi, safi, bgp_dest_get_prefix(dest), path);
1267 : }
1268 : }
1269 :
1270 0 : bgp_process(bgp_path, dest, afi, safi);
1271 : }
1272 :
1273 2 : if (peer) {
1274 2 : int valid_nexthops = bgp_isvalid_nexthop(bnc);
1275 :
1276 2 : if (valid_nexthops) {
1277 : /*
1278 : * Peering cannot occur across a blackhole nexthop
1279 : */
1280 2 : if (bnc->nexthop_num == 1 && bnc->nexthop
1281 2 : && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1282 0 : peer->last_reset = PEER_DOWN_WAITING_NHT;
1283 0 : valid_nexthops = 0;
1284 : } else
1285 2 : peer->last_reset = PEER_DOWN_WAITING_OPEN;
1286 : } else
1287 0 : peer->last_reset = PEER_DOWN_WAITING_NHT;
1288 :
1289 2 : if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
1290 2 : if (BGP_DEBUG(nht, NHT))
1291 0 : zlog_debug(
1292 : "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
1293 : __func__, peer->host,
1294 : peer->bgp->name_pretty,
1295 : !!valid_nexthops);
1296 2 : bgp_fsm_nht_update(peer, !!valid_nexthops);
1297 2 : SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
1298 : }
1299 : }
1300 :
1301 2 : RESET_FLAG(bnc->change_flags);
1302 2 : }
1303 :
1304 : /**
1305 : * path_nh_map - make or break path-to-nexthop association.
1306 : * ARGUMENTS:
1307 : * path - pointer to the path structure
1308 : * bnc - pointer to the nexthop structure
1309 : * make - if set, make the association. if unset, just break the existing
1310 : * association.
1311 : */
1312 4 : void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
1313 : bool make)
1314 : {
1315 4 : if (path->nexthop) {
1316 2 : LIST_REMOVE(path, nh_thread);
1317 2 : path->nexthop->path_count--;
1318 2 : path->nexthop = NULL;
1319 : }
1320 4 : if (make) {
1321 2 : LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
1322 2 : path->nexthop = bnc;
1323 2 : path->nexthop->path_count++;
1324 : }
1325 4 : }
1326 :
1327 : /*
1328 : * This function is called to register nexthops to zebra
1329 : * as that we may have tried to install the nexthops
1330 : * before we actually have a zebra connection
1331 : */
1332 2 : void bgp_nht_register_nexthops(struct bgp *bgp)
1333 : {
1334 8 : for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
1335 6 : struct bgp_nexthop_cache *bnc;
1336 :
1337 12 : frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
1338 : bnc) {
1339 0 : register_zebra_rnh(bnc);
1340 : }
1341 : }
1342 2 : }
1343 :
1344 0 : void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1345 : {
1346 0 : struct bgp *bgp;
1347 0 : struct bgp_nexthop_cache *bnc;
1348 0 : struct nexthop *nhop;
1349 0 : struct interface *ifp;
1350 0 : struct prefix p;
1351 0 : ifindex_t ifindex = 0;
1352 :
1353 0 : if (peer->ifp)
1354 0 : return;
1355 :
1356 0 : bgp = peer->bgp;
1357 0 : if (!sockunion2hostprefix(&peer->su, &p)) {
1358 0 : zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1359 : __func__, peer->host);
1360 0 : return;
1361 : }
1362 :
1363 0 : if (p.family != AF_INET6)
1364 : return;
1365 : /*
1366 : * Gather the ifindex for if up/down events to be
1367 : * tagged into this fun
1368 : */
1369 0 : if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1370 0 : ifindex = peer->su.sin6.sin6_scope_id;
1371 :
1372 0 : bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1373 0 : if (!bnc)
1374 : return;
1375 :
1376 0 : if (peer != bnc->nht_info)
1377 : return;
1378 :
1379 0 : for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1380 0 : ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1381 :
1382 0 : if (!ifp)
1383 0 : continue;
1384 :
1385 0 : zclient_send_interface_radv_req(zclient,
1386 : nhop->vrf_id,
1387 : ifp, true,
1388 : BGP_UNNUM_DEFAULT_RA_INTERVAL);
1389 : }
1390 : }
1391 :
1392 0 : void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
1393 : {
1394 0 : struct bgp *bgp;
1395 0 : struct bgp_nexthop_cache *bnc;
1396 0 : struct nexthop *nhop;
1397 0 : struct interface *ifp;
1398 0 : struct prefix p;
1399 0 : ifindex_t ifindex = 0;
1400 :
1401 0 : if (peer->ifp)
1402 0 : return;
1403 :
1404 0 : bgp = peer->bgp;
1405 :
1406 0 : if (!sockunion2hostprefix(&peer->su, &p)) {
1407 0 : zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1408 : __func__, peer->host);
1409 0 : return;
1410 : }
1411 :
1412 0 : if (p.family != AF_INET6)
1413 : return;
1414 : /*
1415 : * Gather the ifindex for if up/down events to be
1416 : * tagged into this fun
1417 : */
1418 0 : if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1419 0 : ifindex = peer->su.sin6.sin6_scope_id;
1420 :
1421 0 : bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1422 0 : if (!bnc)
1423 : return;
1424 :
1425 0 : if (peer != bnc->nht_info)
1426 : return;
1427 :
1428 0 : for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1429 0 : ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1430 :
1431 0 : if (!ifp)
1432 0 : continue;
1433 :
1434 0 : zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
1435 : 0);
1436 : }
1437 : }
1438 :
1439 : /****************************************************************************
1440 : * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1441 : * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1442 : * left to the application using it.
1443 : * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1444 : * failover of remote ES links.
1445 : ***************************************************************************/
1446 : static bitfield_t bgp_nh_id_bitmap;
1447 : static uint32_t bgp_l3nhg_start;
1448 :
1449 : /* XXX - currently we do nothing on the callbacks */
1450 0 : static void bgp_l3nhg_add_cb(const char *name)
1451 : {
1452 0 : }
1453 :
1454 0 : static void bgp_l3nhg_modify_cb(const struct nexthop_group_cmd *nhgc)
1455 : {
1456 0 : }
1457 :
1458 0 : static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1459 : const struct nexthop *nhop)
1460 : {
1461 0 : }
1462 :
1463 0 : static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1464 : const struct nexthop *nhop)
1465 : {
1466 0 : }
1467 :
1468 0 : static void bgp_l3nhg_del_cb(const char *name)
1469 : {
1470 0 : }
1471 :
1472 0 : static void bgp_l3nhg_zebra_init(void)
1473 : {
1474 0 : static bool bgp_l3nhg_zebra_inited;
1475 0 : if (bgp_l3nhg_zebra_inited)
1476 : return;
1477 :
1478 0 : bgp_l3nhg_zebra_inited = true;
1479 0 : bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
1480 0 : nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_modify_cb,
1481 : bgp_l3nhg_add_nexthop_cb, bgp_l3nhg_del_nexthop_cb,
1482 : bgp_l3nhg_del_cb);
1483 : }
1484 :
1485 :
1486 2 : void bgp_l3nhg_init(void)
1487 : {
1488 2 : uint32_t id_max;
1489 :
1490 2 : id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
1491 2 : bf_init(bgp_nh_id_bitmap, id_max);
1492 10 : bf_assign_zero_index(bgp_nh_id_bitmap);
1493 :
1494 2 : if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1495 0 : zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1496 : bgp_l3nhg_start + id_max);
1497 2 : }
1498 :
1499 2 : void bgp_l3nhg_finish(void)
1500 : {
1501 2 : bf_free(bgp_nh_id_bitmap);
1502 2 : }
1503 :
1504 0 : uint32_t bgp_l3nhg_id_alloc(void)
1505 : {
1506 0 : uint32_t nhg_id = 0;
1507 :
1508 0 : bgp_l3nhg_zebra_init();
1509 0 : bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1510 0 : if (nhg_id)
1511 0 : nhg_id += bgp_l3nhg_start;
1512 :
1513 0 : return nhg_id;
1514 : }
1515 :
1516 0 : void bgp_l3nhg_id_free(uint32_t nhg_id)
1517 : {
1518 0 : if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1519 : return;
1520 :
1521 0 : nhg_id -= bgp_l3nhg_start;
1522 :
1523 0 : bf_release_index(bgp_nh_id_bitmap, nhg_id);
1524 : }
|