back to topotato report
topotato coverage report
Current view: top level - zebra - kernel_netlink.c (source / functions) Hit Total Coverage
Test: test_bgp_rmap_extcommunity_none.py::TestBGPExtCommunity Lines: 439 701 62.6 %
Date: 2023-02-24 18:37:31 Functions: 37 58 63.8 %

          Line data    Source code
       1             : /* Kernel communication using netlink interface.
       2             :  * Copyright (C) 1999 Kunihiro Ishiguro
       3             :  *
       4             :  * This file is part of GNU Zebra.
       5             :  *
       6             :  * GNU Zebra is free software; you can redistribute it and/or modify it
       7             :  * under the terms of the GNU General Public License as published by the
       8             :  * Free Software Foundation; either version 2, or (at your option) any
       9             :  * later version.
      10             :  *
      11             :  * GNU Zebra is distributed in the hope that it will be useful, but
      12             :  * WITHOUT ANY WARRANTY; without even the implied warranty of
      13             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14             :  * General Public License for more details.
      15             :  *
      16             :  * You should have received a copy of the GNU General Public License along
      17             :  * with this program; see the file COPYING; if not, write to the Free Software
      18             :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
      19             :  */
      20             : 
      21             : #include <zebra.h>
      22             : 
      23             : #ifdef HAVE_NETLINK
      24             : 
      25             : #include "linklist.h"
      26             : #include "if.h"
      27             : #include "log.h"
      28             : #include "prefix.h"
      29             : #include "connected.h"
      30             : #include "table.h"
      31             : #include "memory.h"
      32             : #include "rib.h"
      33             : #include "thread.h"
      34             : #include "privs.h"
      35             : #include "nexthop.h"
      36             : #include "vrf.h"
      37             : #include "mpls.h"
      38             : #include "lib_errors.h"
      39             : #include "hash.h"
      40             : 
      41             : #include "zebra/zebra_router.h"
      42             : #include "zebra/zebra_ns.h"
      43             : #include "zebra/zebra_vrf.h"
      44             : #include "zebra/rt.h"
      45             : #include "zebra/debug.h"
      46             : #include "zebra/kernel_netlink.h"
      47             : #include "zebra/rt_netlink.h"
      48             : #include "zebra/if_netlink.h"
      49             : #include "zebra/rule_netlink.h"
      50             : #include "zebra/tc_netlink.h"
      51             : #include "zebra/netconf_netlink.h"
      52             : #include "zebra/zebra_errors.h"
      53             : 
      54             : #ifndef SO_RCVBUFFORCE
      55             : #define SO_RCVBUFFORCE  (33)
      56             : #endif
      57             : 
      58             : /* Hack for GNU libc version 2. */
      59             : #ifndef MSG_TRUNC
      60             : #define MSG_TRUNC      0x20
      61             : #endif /* MSG_TRUNC */
      62             : 
      63             : #ifndef NLMSG_TAIL
      64             : #define NLMSG_TAIL(nmsg)                                                       \
      65             :         ((struct rtattr *)(((uint8_t *)(nmsg))                                 \
      66             :                            + NLMSG_ALIGN((nmsg)->nlmsg_len)))
      67             : #endif
      68             : 
      69             : #ifndef RTA_TAIL
      70             : #define RTA_TAIL(rta)                                                          \
      71             :         ((struct rtattr *)(((uint8_t *)(rta)) + RTA_ALIGN((rta)->rta_len)))
      72             : #endif
      73             : 
      74             : #ifndef RTNL_FAMILY_IP6MR
      75             : #define RTNL_FAMILY_IP6MR 129
      76             : #endif
      77             : 
      78             : #ifndef RTPROT_MROUTED
      79             : #define RTPROT_MROUTED 17
      80             : #endif
      81             : 
      82             : #define NL_DEFAULT_BATCH_BUFSIZE (16 * NL_PKT_BUF_SIZE)
      83             : 
      84             : /*
      85             :  * We limit the batch's size to a number smaller than the length of the
      86             :  * underlying buffer since the last message that wouldn't fit the batch would go
      87             :  * over the upper boundary and then it would have to be encoded again into a new
      88             :  * buffer. If the difference between the limit and the length of the buffer is
      89             :  * big enough (bigger than the biggest Netlink message) then this situation
      90             :  * won't occur.
      91             :  */
      92             : #define NL_DEFAULT_BATCH_SEND_THRESHOLD (15 * NL_PKT_BUF_SIZE)
      93             : 
      94             : static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
      95             :                                            {RTM_DELROUTE, "RTM_DELROUTE"},
      96             :                                            {RTM_GETROUTE, "RTM_GETROUTE"},
      97             :                                            {RTM_NEWLINK, "RTM_NEWLINK"},
      98             :                                            {RTM_SETLINK, "RTM_SETLINK"},
      99             :                                            {RTM_DELLINK, "RTM_DELLINK"},
     100             :                                            {RTM_GETLINK, "RTM_GETLINK"},
     101             :                                            {RTM_NEWADDR, "RTM_NEWADDR"},
     102             :                                            {RTM_DELADDR, "RTM_DELADDR"},
     103             :                                            {RTM_GETADDR, "RTM_GETADDR"},
     104             :                                            {RTM_NEWNEIGH, "RTM_NEWNEIGH"},
     105             :                                            {RTM_DELNEIGH, "RTM_DELNEIGH"},
     106             :                                            {RTM_GETNEIGH, "RTM_GETNEIGH"},
     107             :                                            {RTM_NEWRULE, "RTM_NEWRULE"},
     108             :                                            {RTM_DELRULE, "RTM_DELRULE"},
     109             :                                            {RTM_GETRULE, "RTM_GETRULE"},
     110             :                                            {RTM_NEWNEXTHOP, "RTM_NEWNEXTHOP"},
     111             :                                            {RTM_DELNEXTHOP, "RTM_DELNEXTHOP"},
     112             :                                            {RTM_GETNEXTHOP, "RTM_GETNEXTHOP"},
     113             :                                            {RTM_NEWNETCONF, "RTM_NEWNETCONF"},
     114             :                                            {RTM_DELNETCONF, "RTM_DELNETCONF"},
     115             :                                            {RTM_NEWTUNNEL, "RTM_NEWTUNNEL"},
     116             :                                            {RTM_DELTUNNEL, "RTM_DELTUNNEL"},
     117             :                                            {RTM_GETTUNNEL, "RTM_GETTUNNEL"},
     118             :                                            {RTM_NEWQDISC, "RTM_NEWQDISC"},
     119             :                                            {RTM_DELQDISC, "RTM_DELQDISC"},
     120             :                                            {RTM_GETQDISC, "RTM_GETQDISC"},
     121             :                                            {RTM_NEWTCLASS, "RTM_NEWTCLASS"},
     122             :                                            {RTM_DELTCLASS, "RTM_DELTCLASS"},
     123             :                                            {RTM_GETTCLASS, "RTM_GETTCLASS"},
     124             :                                            {RTM_NEWTFILTER, "RTM_NEWTFILTER"},
     125             :                                            {RTM_DELTFILTER, "RTM_DELTFILTER"},
     126             :                                            {RTM_GETTFILTER, "RTM_GETTFILTER"},
     127             :                                            {0}};
     128             : 
     129             : static const struct message rtproto_str[] = {
     130             :         {RTPROT_REDIRECT, "redirect"},
     131             :         {RTPROT_KERNEL, "kernel"},
     132             :         {RTPROT_BOOT, "boot"},
     133             :         {RTPROT_STATIC, "static"},
     134             :         {RTPROT_GATED, "GateD"},
     135             :         {RTPROT_RA, "router advertisement"},
     136             :         {RTPROT_MRT, "MRT"},
     137             :         {RTPROT_ZEBRA, "Zebra"},
     138             : #ifdef RTPROT_BIRD
     139             :         {RTPROT_BIRD, "BIRD"},
     140             : #endif /* RTPROT_BIRD */
     141             :         {RTPROT_MROUTED, "mroute"},
     142             :         {RTPROT_BGP, "BGP"},
     143             :         {RTPROT_OSPF, "OSPF"},
     144             :         {RTPROT_ISIS, "IS-IS"},
     145             :         {RTPROT_RIP, "RIP"},
     146             :         {RTPROT_RIPNG, "RIPNG"},
     147             :         {RTPROT_ZSTATIC, "static"},
     148             :         {0}};
     149             : 
     150             : static const struct message family_str[] = {{AF_INET, "ipv4"},
     151             :                                             {AF_INET6, "ipv6"},
     152             :                                             {AF_BRIDGE, "bridge"},
     153             :                                             {RTNL_FAMILY_IPMR, "ipv4MR"},
     154             :                                             {RTNL_FAMILY_IP6MR, "ipv6MR"},
     155             :                                             {0}};
     156             : 
     157             : static const struct message rttype_str[] = {{RTN_UNSPEC, "none"},
     158             :                                             {RTN_UNICAST, "unicast"},
     159             :                                             {RTN_LOCAL, "local"},
     160             :                                             {RTN_BROADCAST, "broadcast"},
     161             :                                             {RTN_ANYCAST, "anycast"},
     162             :                                             {RTN_MULTICAST, "multicast"},
     163             :                                             {RTN_BLACKHOLE, "blackhole"},
     164             :                                             {RTN_UNREACHABLE, "unreachable"},
     165             :                                             {RTN_PROHIBIT, "prohibited"},
     166             :                                             {RTN_THROW, "throw"},
     167             :                                             {RTN_NAT, "nat"},
     168             :                                             {RTN_XRESOLVE, "resolver"},
     169             :                                             {0}};
     170             : 
     171             : extern struct thread_master *master;
     172             : 
     173             : extern struct zebra_privs_t zserv_privs;
     174             : 
     175           6 : DEFINE_MTYPE_STATIC(ZEBRA, NL_BUF, "Zebra Netlink buffers");
     176             : 
     177             : /* Hashtable and mutex to allow lookup of nlsock structs by socket/fd value.
     178             :  * We have both the main and dplane pthreads using these structs, so we have
     179             :  * to protect the hash with a lock.
     180             :  */
     181             : static struct hash *nlsock_hash;
     182             : pthread_mutex_t nlsock_mutex;
     183             : 
     184             : /* Lock and unlock wrappers for nlsock hash */
     185             : #define NLSOCK_LOCK() pthread_mutex_lock(&nlsock_mutex)
     186             : #define NLSOCK_UNLOCK() pthread_mutex_unlock(&nlsock_mutex)
     187             : 
     188             : size_t nl_batch_tx_bufsize;
     189             : char *nl_batch_tx_buf;
     190             : 
     191             : _Atomic uint32_t nl_batch_bufsize = NL_DEFAULT_BATCH_BUFSIZE;
     192             : _Atomic uint32_t nl_batch_send_threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD;
     193             : 
     194             : struct nl_batch {
     195             :         void *buf;
     196             :         size_t bufsiz;
     197             :         size_t limit;
     198             : 
     199             :         void *buf_head;
     200             :         size_t curlen;
     201             :         size_t msgcnt;
     202             : 
     203             :         const struct zebra_dplane_info *zns;
     204             : 
     205             :         struct dplane_ctx_list_head ctx_list;
     206             : 
     207             :         /*
     208             :          * Pointer to the queue of completed contexts outbound back
     209             :          * towards the dataplane module.
     210             :          */
     211             :         struct dplane_ctx_list_head *ctx_out_q;
     212             : };
     213             : 
     214           0 : int netlink_config_write_helper(struct vty *vty)
     215             : {
     216           0 :         uint32_t size =
     217           0 :                 atomic_load_explicit(&nl_batch_bufsize, memory_order_relaxed);
     218           0 :         uint32_t threshold = atomic_load_explicit(&nl_batch_send_threshold,
     219             :                                                   memory_order_relaxed);
     220             : 
     221           0 :         if (size != NL_DEFAULT_BATCH_BUFSIZE
     222           0 :             || threshold != NL_DEFAULT_BATCH_SEND_THRESHOLD)
     223           0 :                 vty_out(vty, "zebra kernel netlink batch-tx-buf %u %u\n", size,
     224             :                         threshold);
     225             : 
     226           0 :         if (if_netlink_frr_protodown_r_bit_is_set())
     227           0 :                 vty_out(vty, "zebra protodown reason-bit %u\n",
     228           0 :                         if_netlink_get_frr_protodown_r_bit());
     229             : 
     230           0 :         return 0;
     231             : }
     232             : 
     233           0 : void netlink_set_batch_buffer_size(uint32_t size, uint32_t threshold, bool set)
     234             : {
     235           0 :         if (!set) {
     236           0 :                 size = NL_DEFAULT_BATCH_BUFSIZE;
     237           0 :                 threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD;
     238             :         }
     239             : 
     240           0 :         atomic_store_explicit(&nl_batch_bufsize, size, memory_order_relaxed);
     241           0 :         atomic_store_explicit(&nl_batch_send_threshold, threshold,
     242             :                               memory_order_relaxed);
     243           0 : }
     244             : 
     245           0 : int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns_id, int startup)
     246             : {
     247             :         /*
     248             :          * This is an error condition that must be handled during
     249             :          * development.
     250             :          *
     251             :          * The netlink_talk_filter function is used for communication
     252             :          * down the netlink_cmd pipe and we are expecting
     253             :          * an ack being received.  So if we get here
     254             :          * then we did not receive the ack and instead
     255             :          * received some other message in an unexpected
     256             :          * way.
     257             :          */
     258           0 :         zlog_debug("%s: ignoring message type 0x%04x(%s) NS %u", __func__,
     259             :                    h->nlmsg_type, nl_msg_type_to_str(h->nlmsg_type), ns_id);
     260           0 :         return 0;
     261             : }
     262             : 
     263           8 : static int netlink_recvbuf(struct nlsock *nl, uint32_t newsize)
     264             : {
     265           8 :         uint32_t oldsize;
     266           8 :         socklen_t newlen = sizeof(newsize);
     267           8 :         socklen_t oldlen = sizeof(oldsize);
     268           8 :         int ret;
     269             : 
     270           8 :         ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldlen);
     271           8 :         if (ret < 0) {
     272           0 :                 flog_err_sys(EC_LIB_SOCKET,
     273             :                              "Can't get %s receive buffer size: %s", nl->name,
     274             :                              safe_strerror(errno));
     275           0 :                 return -1;
     276             :         }
     277             : 
     278             :         /* Try force option (linux >= 2.6.14) and fall back to normal set */
     279           8 :         frr_with_privs(&zserv_privs) {
     280           8 :                 ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUFFORCE,
     281             :                                  &rcvbufsize, sizeof(rcvbufsize));
     282             :         }
     283           8 :         if (ret < 0)
     284           8 :                 ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsize,
     285             :                                  sizeof(rcvbufsize));
     286           8 :         if (ret < 0) {
     287           0 :                 flog_err_sys(EC_LIB_SOCKET,
     288             :                              "Can't set %s receive buffer size: %s", nl->name,
     289             :                              safe_strerror(errno));
     290           0 :                 return -1;
     291             :         }
     292             : 
     293           8 :         ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &newsize, &newlen);
     294           8 :         if (ret < 0) {
     295           0 :                 flog_err_sys(EC_LIB_SOCKET,
     296             :                              "Can't get %s receive buffer size: %s", nl->name,
     297             :                              safe_strerror(errno));
     298           0 :                 return -1;
     299             :         }
     300             :         return 0;
     301             : }
     302             : 
     303           0 : static const char *group2str(uint32_t group)
     304             : {
     305           0 :         switch (group) {
     306             :         case RTNLGRP_TUNNEL:
     307             :                 return "RTNLGRP_TUNNEL";
     308           0 :         default:
     309           0 :                 return "UNKNOWN";
     310             :         }
     311             : }
     312             : 
     313             : /* Make socket for Linux netlink interface. */
     314           8 : static int netlink_socket(struct nlsock *nl, unsigned long groups,
     315             :                           uint32_t ext_groups[], uint8_t ext_group_size,
     316             :                           ns_id_t ns_id)
     317             : {
     318           8 :         int ret;
     319           8 :         struct sockaddr_nl snl;
     320           8 :         int sock;
     321           8 :         int namelen;
     322             : 
     323          16 :         frr_with_privs(&zserv_privs) {
     324           8 :                 sock = ns_socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE, ns_id);
     325           8 :                 if (sock < 0) {
     326           0 :                         zlog_err("Can't open %s socket: %s", nl->name,
     327             :                                  safe_strerror(errno));
     328           0 :                         return -1;
     329             :                 }
     330             : 
     331           8 :                 memset(&snl, 0, sizeof(snl));
     332           8 :                 snl.nl_family = AF_NETLINK;
     333           8 :                 snl.nl_groups = groups;
     334             : 
     335           8 :                 if (ext_group_size) {
     336             :                         uint8_t i;
     337             : 
     338           4 :                         for (i = 0; i < ext_group_size; i++) {
     339             : #if defined SOL_NETLINK
     340           4 :                                 ret = setsockopt(sock, SOL_NETLINK,
     341             :                                                  NETLINK_ADD_MEMBERSHIP,
     342           2 :                                                  &ext_groups[i],
     343             :                                                  sizeof(ext_groups[i]));
     344           2 :                                 if (ret < 0) {
     345           2 :                                         zlog_notice(
     346             :                                                 "can't setsockopt NETLINK_ADD_MEMBERSHIP for group %s(%u), this linux kernel does not support it: %s(%d)",
     347             :                                                 group2str(ext_groups[i]),
     348             :                                                 ext_groups[i],
     349             :                                                 safe_strerror(errno), errno);
     350             :                                 }
     351             : #else
     352             :                                 zlog_notice(
     353             :                                         "Unable to use NETLINK_ADD_MEMBERSHIP via SOL_NETLINK for %s(%u) since the linux kernel does not support the socket option",
     354             :                                         group2str(ext_groups[i]),
     355             :                                         ext_groups[i]);
     356             : #endif
     357             :                         }
     358             :                 }
     359             : 
     360             :                 /* Bind the socket to the netlink structure for anything. */
     361           8 :                 ret = bind(sock, (struct sockaddr *)&snl, sizeof(snl));
     362             :         }
     363             : 
     364           8 :         if (ret < 0) {
     365           0 :                 zlog_err("Can't bind %s socket to group 0x%x: %s", nl->name,
     366             :                          snl.nl_groups, safe_strerror(errno));
     367           0 :                 close(sock);
     368           0 :                 return -1;
     369             :         }
     370             : 
     371             :         /* multiple netlink sockets will have different nl_pid */
     372           8 :         namelen = sizeof(snl);
     373           8 :         ret = getsockname(sock, (struct sockaddr *)&snl, (socklen_t *)&namelen);
     374           8 :         if (ret < 0 || namelen != sizeof(snl)) {
     375           0 :                 flog_err_sys(EC_LIB_SOCKET, "Can't get %s socket name: %s",
     376             :                              nl->name, safe_strerror(errno));
     377           0 :                 close(sock);
     378           0 :                 return -1;
     379             :         }
     380             : 
     381           8 :         nl->snl = snl;
     382           8 :         nl->sock = sock;
     383           8 :         nl->buflen = NL_RCV_PKT_BUF_SIZE;
     384           8 :         nl->buf = XMALLOC(MTYPE_NL_BUF, nl->buflen);
     385             : 
     386           8 :         return ret;
     387             : }
     388             : 
     389             : /*
     390             :  * Dispatch an incoming netlink message; used by the zebra main pthread's
     391             :  * netlink event reader.
     392             :  */
     393          14 : static int netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id,
     394             :                                      int startup)
     395             : {
     396             :         /*
     397             :          * When we handle new message types here
     398             :          * because we are starting to install them
     399             :          * then lets check the netlink_install_filter
     400             :          * and see if we should add the corresponding
     401             :          * allow through entry there.
     402             :          * Probably not needed to do but please
     403             :          * think about it.
     404             :          */
     405          14 :         switch (h->nlmsg_type) {
     406           6 :         case RTM_NEWROUTE:
     407           6 :                 return netlink_route_change(h, ns_id, startup);
     408           0 :         case RTM_DELROUTE:
     409           0 :                 return netlink_route_change(h, ns_id, startup);
     410           3 :         case RTM_NEWLINK:
     411           3 :                 return netlink_link_change(h, ns_id, startup);
     412           0 :         case RTM_DELLINK:
     413           0 :                 return netlink_link_change(h, ns_id, startup);
     414           3 :         case RTM_NEWNEIGH:
     415             :         case RTM_DELNEIGH:
     416             :         case RTM_GETNEIGH:
     417           3 :                 return netlink_neigh_change(h, ns_id);
     418           0 :         case RTM_NEWRULE:
     419           0 :                 return netlink_rule_change(h, ns_id, startup);
     420           0 :         case RTM_DELRULE:
     421           0 :                 return netlink_rule_change(h, ns_id, startup);
     422           0 :         case RTM_NEWNEXTHOP:
     423           0 :                 return netlink_nexthop_change(h, ns_id, startup);
     424           0 :         case RTM_DELNEXTHOP:
     425           0 :                 return netlink_nexthop_change(h, ns_id, startup);
     426           0 :         case RTM_NEWQDISC:
     427             :         case RTM_DELQDISC:
     428           0 :                 return netlink_qdisc_change(h, ns_id, startup);
     429           0 :         case RTM_NEWTCLASS:
     430             :         case RTM_DELTCLASS:
     431           0 :                 return netlink_tclass_change(h, ns_id, startup);
     432           0 :         case RTM_NEWTFILTER:
     433             :         case RTM_DELTFILTER:
     434           0 :                 return netlink_tfilter_change(h, ns_id, startup);
     435             : 
     436             :         /* Messages handled in the dplane thread */
     437             :         case RTM_NEWADDR:
     438             :         case RTM_DELADDR:
     439             :         case RTM_NEWNETCONF:
     440             :         case RTM_DELNETCONF:
     441             :         case RTM_NEWTUNNEL:
     442             :         case RTM_DELTUNNEL:
     443             :         case RTM_GETTUNNEL:
     444             :                 return 0;
     445           0 :         default:
     446             :                 /*
     447             :                  * If we have received this message then
     448             :                  * we have made a mistake during development
     449             :                  * and we need to write some code to handle
     450             :                  * this message type or not ask for
     451             :                  * it to be sent up to us
     452             :                  */
     453           0 :                 flog_err(EC_ZEBRA_UNKNOWN_NLMSG,
     454             :                          "Unknown netlink nlmsg_type %s(%d) vrf %u",
     455             :                          nl_msg_type_to_str(h->nlmsg_type), h->nlmsg_type,
     456             :                          ns_id);
     457           0 :                 break;
     458             :         }
     459           0 :         return 0;
     460             : }
     461             : 
     462             : /*
     463             :  * Dispatch an incoming netlink message; used by the dataplane pthread's
     464             :  * netlink event reader code.
     465             :  */
     466          21 : static int dplane_netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id,
     467             :                                             int startup)
     468             : {
     469             :         /*
     470             :          * Dispatch the incoming messages that the dplane pthread handles
     471             :          */
     472          21 :         switch (h->nlmsg_type) {
     473           2 :         case RTM_NEWADDR:
     474             :         case RTM_DELADDR:
     475           2 :                 return netlink_interface_addr_dplane(h, ns_id, startup);
     476             : 
     477          16 :         case RTM_NEWNETCONF:
     478             :         case RTM_DELNETCONF:
     479          16 :                 return netlink_netconf_change(h, ns_id, startup);
     480             : 
     481             :         /* TODO -- other messages for the dplane socket and pthread */
     482             : 
     483             :         case RTM_NEWLINK:
     484             :         case RTM_DELLINK:
     485             : 
     486             :         default:
     487             :                 break;
     488             :         }
     489             : 
     490             :         return 0;
     491             : }
     492             : 
     493           5 : static void kernel_read(struct thread *thread)
     494             : {
     495           5 :         struct zebra_ns *zns = (struct zebra_ns *)THREAD_ARG(thread);
     496           5 :         struct zebra_dplane_info dp_info;
     497             : 
     498             :         /* Capture key info from ns struct */
     499           5 :         zebra_dplane_info_from_zns(&dp_info, zns, false);
     500             : 
     501           5 :         netlink_parse_info(netlink_information_fetch, &zns->netlink, &dp_info,
     502             :                            5, false);
     503             : 
     504           5 :         thread_add_read(zrouter.master, kernel_read, zns, zns->netlink.sock,
     505             :                         &zns->t_netlink);
     506           5 : }
     507             : 
     508             : /*
     509             :  * Called by the dplane pthread to read incoming OS messages and dispatch them.
     510             :  */
     511           6 : int kernel_dplane_read(struct zebra_dplane_info *info)
     512             : {
     513           6 :         struct nlsock *nl = kernel_netlink_nlsock_lookup(info->sock);
     514             : 
     515           6 :         netlink_parse_info(dplane_netlink_information_fetch, nl, info, 5,
     516             :                            false);
     517             : 
     518           6 :         return 0;
     519             : }
     520             : 
     521             : /*
     522             :  * Filter out messages from self that occur on listener socket,
     523             :  * caused by our actions on the command socket(s)
     524             :  *
     525             :  * When we add new Netlink message types we probably
     526             :  * do not need to add them here as that we are filtering
     527             :  * on the routes we actually care to receive( which is rarer
     528             :  * then the normal course of operations).  We are intentionally
     529             :  * allowing some messages from ourselves through
     530             :  * ( I'm looking at you Interface based netlink messages )
     531             :  * so that we only have to write one way to handle incoming
     532             :  * address add/delete and xxxNETCONF changes.
     533             :  */
     534           4 : static void netlink_install_filter(int sock, uint32_t pid, uint32_t dplane_pid)
     535             : {
     536             :         /*
     537             :          * BPF_JUMP instructions and where you jump to are based upon
     538             :          * 0 as being the next statement.  So count from 0.  Writing
     539             :          * this down because every time I look at this I have to
     540             :          * re-remember it.
     541             :          */
     542           4 :         struct sock_filter filter[] = {
     543             :                 /*
     544             :                  * Logic:
     545             :                  *   if (nlmsg_pid == pid ||
     546             :                  *       nlmsg_pid == dplane_pid) {
     547             :                  *       if (the incoming nlmsg_type ==
     548             :                  *           RTM_NEWADDR || RTM_DELADDR || RTM_NEWNETCONF ||
     549             :                  *           RTM_DELNETCONF)
     550             :                  *           keep this message
     551             :                  *       else
     552             :                  *           skip this message
     553             :                  *   } else
     554             :                  *       keep this netlink message
     555             :                  */
     556             :                 /*
     557             :                  * 0: Load the nlmsg_pid into the BPF register
     558             :                  */
     559             :                 BPF_STMT(BPF_LD | BPF_ABS | BPF_W,
     560             :                          offsetof(struct nlmsghdr, nlmsg_pid)),
     561             :                 /*
     562             :                  * 1: Compare to pid
     563             :                  */
     564           4 :                 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 1, 0),
     565             :                 /*
     566             :                  * 2: Compare to dplane pid
     567             :                  */
     568           4 :                 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(dplane_pid), 0, 6),
     569             :                 /*
     570             :                  * 3: Load the nlmsg_type into BPF register
     571             :                  */
     572             :                 BPF_STMT(BPF_LD | BPF_ABS | BPF_H,
     573             :                          offsetof(struct nlmsghdr, nlmsg_type)),
     574             :                 /*
     575             :                  * 4: Compare to RTM_NEWADDR
     576             :                  */
     577           4 :                 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWADDR), 4, 0),
     578             :                 /*
     579             :                  * 5: Compare to RTM_DELADDR
     580             :                  */
     581           4 :                 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELADDR), 3, 0),
     582             :                 /*
     583             :                  * 6: Compare to RTM_NEWNETCONF
     584             :                  */
     585           4 :                 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWNETCONF), 2,
     586             :                          0),
     587             :                 /*
     588             :                  * 7: Compare to RTM_DELNETCONF
     589             :                  */
     590           4 :                 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELNETCONF), 1,
     591             :                          0),
     592             :                 /*
     593             :                  * 8: This is the end state of we want to skip the
     594             :                  *    message
     595             :                  */
     596             :                 BPF_STMT(BPF_RET | BPF_K, 0),
     597             :                 /* 9: This is the end state of we want to keep
     598             :                  *     the message
     599             :                  */
     600             :                 BPF_STMT(BPF_RET | BPF_K, 0xffff),
     601             :         };
     602             : 
     603           4 :         struct sock_fprog prog = {
     604             :                 .len = array_size(filter), .filter = filter,
     605             :         };
     606             : 
     607           4 :         if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))
     608             :             < 0)
     609           0 :                 flog_err_sys(EC_LIB_SOCKET, "Can't install socket filter: %s",
     610             :                              safe_strerror(errno));
     611           4 : }
     612             : 
     613           7 : void netlink_parse_rtattr_flags(struct rtattr **tb, int max, struct rtattr *rta,
     614             :                                 int len, unsigned short flags)
     615             : {
     616           7 :         unsigned short type;
     617             : 
     618           7 :         memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
     619         239 :         while (RTA_OK(rta, len)) {
     620         232 :                 type = rta->rta_type & ~flags;
     621         232 :                 if ((type <= max) && (!tb[type]))
     622         197 :                         tb[type] = rta;
     623         232 :                 rta = RTA_NEXT(rta, len);
     624             :         }
     625           7 : }
     626             : 
     627          59 : void netlink_parse_rtattr(struct rtattr **tb, int max, struct rtattr *rta,
     628             :                           int len)
     629             : {
     630          59 :         memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
     631         322 :         while (RTA_OK(rta, len)) {
     632         263 :                 if (rta->rta_type <= max)
     633         259 :                         tb[rta->rta_type] = rta;
     634         263 :                 rta = RTA_NEXT(rta, len);
     635             :         }
     636          59 : }
     637             : 
     638             : /**
     639             :  * netlink_parse_rtattr_nested() - Parses a nested route attribute
     640             :  * @tb:         Pointer to array for storing rtattr in.
     641             :  * @max:        Max number to store.
     642             :  * @rta:        Pointer to rtattr to look for nested items in.
     643             :  */
     644           5 : void netlink_parse_rtattr_nested(struct rtattr **tb, int max,
     645             :                                  struct rtattr *rta)
     646             : {
     647           5 :         netlink_parse_rtattr(tb, max, RTA_DATA(rta), RTA_PAYLOAD(rta));
     648           5 : }
     649             : 
     650           0 : bool nl_addraw_l(struct nlmsghdr *n, unsigned int maxlen, const void *data,
     651             :                  unsigned int len)
     652             : {
     653           0 :         if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) {
     654           0 :                 zlog_err("ERROR message exceeded bound of %d", maxlen);
     655           0 :                 return false;
     656             :         }
     657             : 
     658           0 :         memcpy(NLMSG_TAIL(n), data, len);
     659           0 :         memset((uint8_t *)NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len);
     660           0 :         n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len);
     661             : 
     662           0 :         return true;
     663             : }
     664             : 
     665          46 : bool nl_attr_put(struct nlmsghdr *n, unsigned int maxlen, int type,
     666             :                  const void *data, unsigned int alen)
     667             : {
     668          46 :         int len;
     669          46 :         struct rtattr *rta;
     670             : 
     671          46 :         len = RTA_LENGTH(alen);
     672             : 
     673          46 :         if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen)
     674             :                 return false;
     675             : 
     676          46 :         rta = (struct rtattr *)(((char *)n) + NLMSG_ALIGN(n->nlmsg_len));
     677          46 :         rta->rta_type = type;
     678          46 :         rta->rta_len = len;
     679             : 
     680          46 :         if (data)
     681          46 :                 memcpy(RTA_DATA(rta), data, alen);
     682             :         else
     683           0 :                 assert(alen == 0);
     684             : 
     685          46 :         n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
     686             : 
     687          46 :         return true;
     688             : }
     689             : 
     690           0 : bool nl_attr_put8(struct nlmsghdr *n, unsigned int maxlen, int type,
     691             :                   uint8_t data)
     692             : {
     693           0 :         return nl_attr_put(n, maxlen, type, &data, sizeof(uint8_t));
     694             : }
     695             : 
     696           0 : bool nl_attr_put16(struct nlmsghdr *n, unsigned int maxlen, int type,
     697             :                    uint16_t data)
     698             : {
     699           0 :         return nl_attr_put(n, maxlen, type, &data, sizeof(uint16_t));
     700             : }
     701             : 
     702          42 : bool nl_attr_put32(struct nlmsghdr *n, unsigned int maxlen, int type,
     703             :                    uint32_t data)
     704             : {
     705          42 :         return nl_attr_put(n, maxlen, type, &data, sizeof(uint32_t));
     706             : }
     707             : 
     708           0 : struct rtattr *nl_attr_nest(struct nlmsghdr *n, unsigned int maxlen, int type)
     709             : {
     710           0 :         struct rtattr *nest = NLMSG_TAIL(n);
     711             : 
     712           0 :         if (!nl_attr_put(n, maxlen, type, NULL, 0))
     713             :                 return NULL;
     714             : 
     715           0 :         nest->rta_type |= NLA_F_NESTED;
     716           0 :         return nest;
     717             : }
     718             : 
     719           0 : int nl_attr_nest_end(struct nlmsghdr *n, struct rtattr *nest)
     720             : {
     721           0 :         nest->rta_len = (uint8_t *)NLMSG_TAIL(n) - (uint8_t *)nest;
     722           0 :         return n->nlmsg_len;
     723             : }
     724             : 
     725           0 : struct rtnexthop *nl_attr_rtnh(struct nlmsghdr *n, unsigned int maxlen)
     726             : {
     727           0 :         struct rtnexthop *rtnh = (struct rtnexthop *)NLMSG_TAIL(n);
     728             : 
     729           0 :         if (NLMSG_ALIGN(n->nlmsg_len) + RTNH_ALIGN(sizeof(struct rtnexthop))
     730           0 :             > maxlen)
     731             :                 return NULL;
     732             : 
     733           0 :         memset(rtnh, 0, sizeof(struct rtnexthop));
     734           0 :         n->nlmsg_len =
     735           0 :                 NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(sizeof(struct rtnexthop));
     736             : 
     737           0 :         return rtnh;
     738             : }
     739             : 
     740           0 : void nl_attr_rtnh_end(struct nlmsghdr *n, struct rtnexthop *rtnh)
     741             : {
     742           0 :         rtnh->rtnh_len = (uint8_t *)NLMSG_TAIL(n) - (uint8_t *)rtnh;
     743           0 : }
     744             : 
     745           0 : bool nl_rta_put(struct rtattr *rta, unsigned int maxlen, int type,
     746             :                 const void *data, int alen)
     747             : {
     748           0 :         struct rtattr *subrta;
     749           0 :         int len = RTA_LENGTH(alen);
     750             : 
     751           0 :         if (RTA_ALIGN(rta->rta_len) + RTA_ALIGN(len) > maxlen) {
     752           0 :                 zlog_err("ERROR max allowed bound %d exceeded for rtattr",
     753             :                          maxlen);
     754           0 :                 return false;
     755             :         }
     756           0 :         subrta = (struct rtattr *)(((char *)rta) + RTA_ALIGN(rta->rta_len));
     757           0 :         subrta->rta_type = type;
     758           0 :         subrta->rta_len = len;
     759           0 :         if (alen)
     760           0 :                 memcpy(RTA_DATA(subrta), data, alen);
     761           0 :         rta->rta_len = NLMSG_ALIGN(rta->rta_len) + RTA_ALIGN(len);
     762             : 
     763           0 :         return true;
     764             : }
     765             : 
     766           0 : bool nl_rta_put16(struct rtattr *rta, unsigned int maxlen, int type,
     767             :                   uint16_t data)
     768             : {
     769           0 :         return nl_rta_put(rta, maxlen, type, &data, sizeof(uint16_t));
     770             : }
     771             : 
     772           0 : bool nl_rta_put64(struct rtattr *rta, unsigned int maxlen, int type,
     773             :                   uint64_t data)
     774             : {
     775           0 :         return nl_rta_put(rta, maxlen, type, &data, sizeof(uint64_t));
     776             : }
     777             : 
     778           0 : struct rtattr *nl_rta_nest(struct rtattr *rta, unsigned int maxlen, int type)
     779             : {
     780           0 :         struct rtattr *nest = RTA_TAIL(rta);
     781             : 
     782           0 :         if (nl_rta_put(rta, maxlen, type, NULL, 0))
     783             :                 return NULL;
     784             : 
     785           0 :         nest->rta_type |= NLA_F_NESTED;
     786             : 
     787           0 :         return nest;
     788             : }
     789             : 
     790           0 : int nl_rta_nest_end(struct rtattr *rta, struct rtattr *nest)
     791             : {
     792           0 :         nest->rta_len = (uint8_t *)RTA_TAIL(rta) - (uint8_t *)nest;
     793             : 
     794           0 :         return rta->rta_len;
     795             : }
     796             : 
     797           4 : const char *nl_msg_type_to_str(uint16_t msg_type)
     798             : {
     799           0 :         return lookup_msg(nlmsg_str, msg_type, "");
     800             : }
     801             : 
     802           0 : const char *nl_rtproto_to_str(uint8_t rtproto)
     803             : {
     804           0 :         return lookup_msg(rtproto_str, rtproto, "");
     805             : }
     806             : 
     807           0 : const char *nl_family_to_str(uint8_t family)
     808             : {
     809           0 :         return lookup_msg(family_str, family, "");
     810             : }
     811             : 
     812           0 : const char *nl_rttype_to_str(uint8_t rttype)
     813             : {
     814           0 :         return lookup_msg(rttype_str, rttype, "");
     815             : }
     816             : 
     817             : #define NLA_OK(nla, len)                                                       \
     818             :         ((len) >= (int)sizeof(struct nlattr)                                   \
     819             :          && (nla)->nla_len >= sizeof(struct nlattr)                            \
     820             :          && (nla)->nla_len <= (len))
     821             : #define NLA_NEXT(nla, attrlen)                                                 \
     822             :         ((attrlen) -= NLA_ALIGN((nla)->nla_len),                               \
     823             :          (struct nlattr *)(((char *)(nla)) + NLA_ALIGN((nla)->nla_len)))
     824             : #define NLA_LENGTH(len) (NLA_ALIGN(sizeof(struct nlattr)) + (len))
     825             : #define NLA_DATA(nla) ((struct nlattr *)(((char *)(nla)) + NLA_LENGTH(0)))
     826             : 
     827             : #define ERR_NLA(err, inner_len)                                                \
     828             :         ((struct nlattr *)(((char *)(err))                                     \
     829             :                            + NLMSG_ALIGN(sizeof(struct nlmsgerr))              \
     830             :                            + NLMSG_ALIGN((inner_len))))
     831             : 
     832           4 : static void netlink_parse_nlattr(struct nlattr **tb, int max,
     833             :                                  struct nlattr *nla, int len)
     834             : {
     835           8 :         while (NLA_OK(nla, len)) {
     836           4 :                 if (nla->nla_type <= max)
     837           4 :                         tb[nla->nla_type] = nla;
     838           4 :                 nla = NLA_NEXT(nla, len);
     839             :         }
     840           4 : }
     841             : 
     842           4 : static void netlink_parse_extended_ack(struct nlmsghdr *h)
     843             : {
     844           4 :         struct nlattr *tb[NLMSGERR_ATTR_MAX + 1] = {};
     845           4 :         const struct nlmsgerr *err = (const struct nlmsgerr *)NLMSG_DATA(h);
     846           4 :         const struct nlmsghdr *err_nlh = NULL;
     847             :         /* Length not including nlmsghdr */
     848           4 :         uint32_t len = 0;
     849             :         /* Inner error netlink message length */
     850           4 :         uint32_t inner_len = 0;
     851           4 :         const char *msg = NULL;
     852           4 :         uint32_t off = 0;
     853             : 
     854           4 :         if (!(h->nlmsg_flags & NLM_F_CAPPED))
     855           0 :                 inner_len = (uint32_t)NLMSG_PAYLOAD(&err->msg, 0);
     856             : 
     857           4 :         len = (uint32_t)(NLMSG_PAYLOAD(h, sizeof(struct nlmsgerr)) - inner_len);
     858             : 
     859           4 :         netlink_parse_nlattr(tb, NLMSGERR_ATTR_MAX, ERR_NLA(err, inner_len),
     860             :                              len);
     861             : 
     862           4 :         if (tb[NLMSGERR_ATTR_MSG])
     863           4 :                 msg = (const char *)NLA_DATA(tb[NLMSGERR_ATTR_MSG]);
     864             : 
     865           4 :         if (tb[NLMSGERR_ATTR_OFFS]) {
     866           0 :                 off = *(uint32_t *)NLA_DATA(tb[NLMSGERR_ATTR_OFFS]);
     867             : 
     868           0 :                 if (off > h->nlmsg_len) {
     869           0 :                         zlog_err("Invalid offset for NLMSGERR_ATTR_OFFS");
     870           0 :                 } else if (!(h->nlmsg_flags & NLM_F_CAPPED)) {
     871             :                         /*
     872             :                          * Header of failed message
     873             :                          * we are not doing anything currently with it
     874             :                          * but noticing it for later.
     875             :                          */
     876           0 :                         err_nlh = &err->msg;
     877           0 :                         zlog_debug("%s: Received %s extended Ack", __func__,
     878             :                                    nl_msg_type_to_str(err_nlh->nlmsg_type));
     879             :                 }
     880             :         }
     881             : 
     882           4 :         if (msg && *msg != '\0') {
     883           4 :                 bool is_err = !!err->error;
     884             : 
     885           4 :                 if (is_err)
     886           4 :                         zlog_err("Extended Error: %s", msg);
     887             :                 else
     888           0 :                         flog_warn(EC_ZEBRA_NETLINK_EXTENDED_WARNING,
     889             :                                   "Extended Warning: %s", msg);
     890             :         }
     891           4 : }
     892             : 
     893             : /*
     894             :  * netlink_send_msg - send a netlink message of a certain size.
     895             :  *
     896             :  * Returns -1 on error. Otherwise, it returns the number of bytes sent.
     897             :  */
     898          31 : static ssize_t netlink_send_msg(const struct nlsock *nl, void *buf,
     899             :                                 size_t buflen)
     900             : {
     901          31 :         struct sockaddr_nl snl = {};
     902          31 :         struct iovec iov = {};
     903          31 :         struct msghdr msg = {};
     904          31 :         ssize_t status;
     905          31 :         int save_errno = 0;
     906             : 
     907          31 :         iov.iov_base = buf;
     908          31 :         iov.iov_len = buflen;
     909          31 :         msg.msg_name = &snl;
     910          31 :         msg.msg_namelen = sizeof(snl);
     911          31 :         msg.msg_iov = &iov;
     912          31 :         msg.msg_iovlen = 1;
     913             : 
     914          31 :         snl.nl_family = AF_NETLINK;
     915             : 
     916             :         /* Send message to netlink interface. */
     917          62 :         frr_with_privs(&zserv_privs) {
     918          31 :                 status = sendmsg(nl->sock, &msg, 0);
     919          31 :                 save_errno = errno;
     920             :         }
     921             : 
     922          31 :         if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) {
     923           0 :                 zlog_debug("%s: >> netlink message dump [sent]", __func__);
     924             : #ifdef NETLINK_DEBUG
     925           0 :                 nl_dump(buf, buflen);
     926             : #else
     927             :                 zlog_hexdump(buf, buflen);
     928             : #endif /* NETLINK_DEBUG */
     929             :         }
     930             : 
     931          31 :         if (status == -1) {
     932           0 :                 flog_err_sys(EC_LIB_SOCKET, "%s error: %s", __func__,
     933             :                              safe_strerror(save_errno));
     934           0 :                 return -1;
     935             :         }
     936             : 
     937             :         return status;
     938             : }
     939             : 
     940             : /*
     941             :  * netlink_recv_msg - receive a netlink message.
     942             :  *
     943             :  * Returns -1 on error, 0 if read would block or the number of bytes received.
     944             :  */
     945          81 : static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg)
     946             : {
     947          81 :         struct iovec iov;
     948          81 :         int status;
     949             : 
     950          81 :         iov.iov_base = nl->buf;
     951          81 :         iov.iov_len = nl->buflen;
     952          81 :         msg->msg_iov = &iov;
     953          81 :         msg->msg_iovlen = 1;
     954             : 
     955          81 :         do {
     956          81 :                 int bytes;
     957             : 
     958          81 :                 bytes = recv(nl->sock, NULL, 0, MSG_PEEK | MSG_TRUNC);
     959             : 
     960          81 :                 if (bytes >= 0 && (size_t)bytes > nl->buflen) {
     961           0 :                         nl->buf = XREALLOC(MTYPE_NL_BUF, nl->buf, bytes);
     962           0 :                         nl->buflen = bytes;
     963           0 :                         iov.iov_base = nl->buf;
     964           0 :                         iov.iov_len = nl->buflen;
     965             :                 }
     966             : 
     967          81 :                 status = recvmsg(nl->sock, msg, 0);
     968          81 :         } while (status == -1 && errno == EINTR);
     969             : 
     970          81 :         if (status == -1) {
     971          16 :                 if (errno == EWOULDBLOCK || errno == EAGAIN)
     972             :                         return 0;
     973           0 :                 flog_err(EC_ZEBRA_RECVMSG_OVERRUN, "%s recvmsg overrun: %s",
     974             :                          nl->name, safe_strerror(errno));
     975             :                 /*
     976             :                  * In this case we are screwed. There is no good way to recover
     977             :                  * zebra at this point.
     978             :                  */
     979           0 :                 exit(-1);
     980             :         }
     981             : 
     982          65 :         if (status == 0) {
     983           0 :                 flog_err_sys(EC_LIB_SOCKET, "%s EOF", nl->name);
     984           0 :                 return -1;
     985             :         }
     986             : 
     987          65 :         if (msg->msg_namelen != sizeof(struct sockaddr_nl)) {
     988           0 :                 flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
     989             :                          "%s sender address length error: length %d", nl->name,
     990             :                          msg->msg_namelen);
     991           0 :                 return -1;
     992             :         }
     993             : 
     994          65 :         if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) {
     995           0 :                 zlog_debug("%s: << netlink message dump [recv]", __func__);
     996             : #ifdef NETLINK_DEBUG
     997           0 :                 nl_dump(nl->buf, status);
     998             : #else
     999             :                 zlog_hexdump(nl->buf, status);
    1000             : #endif /* NETLINK_DEBUG */
    1001             :         }
    1002             : 
    1003             :         return status;
    1004             : }
    1005             : 
    1006             : /*
    1007             :  * netlink_parse_error - parse a netlink error message
    1008             :  *
    1009             :  * Returns 1 if this message is acknowledgement, 0 if this error should be
    1010             :  * ignored, -1 otherwise.
    1011             :  */
    1012           4 : static int netlink_parse_error(const struct nlsock *nl, struct nlmsghdr *h,
    1013             :                                bool is_cmd, bool startup)
    1014             : {
    1015           4 :         struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
    1016           4 :         int errnum = err->error;
    1017           4 :         int msg_type = err->msg.nlmsg_type;
    1018             : 
    1019           4 :         if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
    1020           0 :                 flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
    1021             :                          "%s error: message truncated", nl->name);
    1022           0 :                 return -1;
    1023             :         }
    1024             : 
    1025             :         /*
    1026             :          * Parse the extended information before we actually handle it. At this
    1027             :          * point in time we do not do anything other than report the issue.
    1028             :          */
    1029           4 :         if (h->nlmsg_flags & NLM_F_ACK_TLVS)
    1030           4 :                 netlink_parse_extended_ack(h);
    1031             : 
    1032             :         /* If the error field is zero, then this is an ACK. */
    1033           4 :         if (err->error == 0) {
    1034           0 :                 if (IS_ZEBRA_DEBUG_KERNEL) {
    1035           0 :                         zlog_debug("%s: %s ACK: type=%s(%u), seq=%u, pid=%u",
    1036             :                                    __func__, nl->name,
    1037             :                                    nl_msg_type_to_str(err->msg.nlmsg_type),
    1038             :                                    err->msg.nlmsg_type, err->msg.nlmsg_seq,
    1039             :                                    err->msg.nlmsg_pid);
    1040             :                 }
    1041             : 
    1042           0 :                 return 1;
    1043             :         }
    1044             : 
    1045             :         /*
    1046             :          * Deal with errors that occur because of races in link handling
    1047             :          * or types are not supported in kernel.
    1048             :          */
    1049           4 :         if (is_cmd &&
    1050           0 :             ((msg_type == RTM_DELROUTE &&
    1051           4 :               (-errnum == ENODEV || -errnum == ESRCH)) ||
    1052           0 :              (msg_type == RTM_NEWROUTE &&
    1053           0 :               (-errnum == ENETDOWN || -errnum == EEXIST)) ||
    1054           4 :              ((msg_type == RTM_NEWTUNNEL || msg_type == RTM_DELTUNNEL ||
    1055           0 :                msg_type == RTM_GETTUNNEL) &&
    1056             :               (-errnum == EOPNOTSUPP)))) {
    1057           0 :                 if (IS_ZEBRA_DEBUG_KERNEL)
    1058           0 :                         zlog_debug("%s: error: %s type=%s(%u), seq=%u, pid=%u",
    1059             :                                    nl->name, safe_strerror(-errnum),
    1060             :                                    nl_msg_type_to_str(msg_type), msg_type,
    1061             :                                    err->msg.nlmsg_seq, err->msg.nlmsg_pid);
    1062           0 :                 return 0;
    1063             :         }
    1064             : 
    1065             :         /*
    1066             :          * We see RTM_DELNEIGH when shutting down an interface with an IPv4
    1067             :          * link-local.  The kernel should have already deleted the neighbor so
    1068             :          * do not log these as an error.
    1069             :          */
    1070           4 :         if (msg_type == RTM_DELNEIGH
    1071           4 :             || (is_cmd && msg_type == RTM_NEWROUTE
    1072           0 :                 && (-errnum == ESRCH || -errnum == ENETUNREACH))) {
    1073             :                 /*
    1074             :                  * This is known to happen in some situations, don't log as
    1075             :                  * error.
    1076             :                  */
    1077           0 :                 if (IS_ZEBRA_DEBUG_KERNEL)
    1078           0 :                         zlog_debug("%s error: %s, type=%s(%u), seq=%u, pid=%u",
    1079             :                                    nl->name, safe_strerror(-errnum),
    1080             :                                    nl_msg_type_to_str(msg_type), msg_type,
    1081             :                                    err->msg.nlmsg_seq, err->msg.nlmsg_pid);
    1082             :         } else {
    1083           4 :                 if ((msg_type != RTM_GETNEXTHOP) || !startup)
    1084           4 :                         flog_err(EC_ZEBRA_UNEXPECTED_MESSAGE,
    1085             :                                  "%s error: %s, type=%s(%u), seq=%u, pid=%u",
    1086             :                                  nl->name, safe_strerror(-errnum),
    1087             :                                  nl_msg_type_to_str(msg_type), msg_type,
    1088             :                                  err->msg.nlmsg_seq, err->msg.nlmsg_pid);
    1089             :         }
    1090             : 
    1091             :         return -1;
    1092             : }
    1093             : 
    1094             : /*
    1095             :  * netlink_parse_info
    1096             :  *
    1097             :  * Receive message from netlink interface and pass those information
    1098             :  *  to the given function.
    1099             :  *
    1100             :  * filter  -> Function to call to read the results
    1101             :  * nl      -> netlink socket information
    1102             :  * zns     -> The zebra namespace data
    1103             :  * count   -> How many we should read in, 0 means as much as possible
    1104             :  * startup -> Are we reading in under startup conditions? passed to
    1105             :  *            the filter.
    1106             :  */
    1107          31 : int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
    1108             :                        struct nlsock *nl, const struct zebra_dplane_info *zns,
    1109             :                        int count, bool startup)
    1110             : {
    1111          31 :         int status;
    1112          31 :         int ret = 0;
    1113          31 :         int error;
    1114          31 :         int read_in = 0;
    1115             : 
    1116          70 :         while (1) {
    1117          70 :                 struct sockaddr_nl snl;
    1118          70 :                 struct msghdr msg = {.msg_name = (void *)&snl,
    1119             :                                      .msg_namelen = sizeof(snl)};
    1120          70 :                 struct nlmsghdr *h;
    1121             : 
    1122          70 :                 if (count && read_in >= count)
    1123          24 :                         return 0;
    1124             : 
    1125          68 :                 status = netlink_recv_msg(nl, &msg);
    1126          68 :                 if (status == -1)
    1127             :                         return -1;
    1128          68 :                 else if (status == 0)
    1129             :                         break;
    1130             : 
    1131          61 :                 read_in++;
    1132          61 :                 for (h = (struct nlmsghdr *)nl->buf;
    1133         154 :                      (status >= 0 && NLMSG_OK(h, (unsigned int)status));
    1134          93 :                      h = NLMSG_NEXT(h, status)) {
    1135             :                         /* Finish of reading. */
    1136         115 :                         if (h->nlmsg_type == NLMSG_DONE)
    1137          22 :                                 return ret;
    1138             : 
    1139             :                         /* Error handling. */
    1140          93 :                         if (h->nlmsg_type == NLMSG_ERROR) {
    1141           0 :                                 int err = netlink_parse_error(
    1142           0 :                                         nl, h, zns->is_cmd, startup);
    1143             : 
    1144           0 :                                 if (err == 1) {
    1145           0 :                                         if (!(h->nlmsg_flags & NLM_F_MULTI))
    1146             :                                                 return 0;
    1147           0 :                                         continue;
    1148             :                                 } else
    1149           0 :                                         return err;
    1150             :                         }
    1151             : 
    1152             :                         /*
    1153             :                          * What is the right thing to do?  The kernel
    1154             :                          * is telling us that the dump request was interrupted
    1155             :                          * and we more than likely are out of luck and have
    1156             :                          * missed data from the kernel.  At this point in time
    1157             :                          * lets just note that this is happening.
    1158             :                          */
    1159          93 :                         if (h->nlmsg_flags & NLM_F_DUMP_INTR)
    1160           0 :                                 flog_err(
    1161             :                                         EC_ZEBRA_NETLINK_BAD_SEQUENCE,
    1162             :                                         "netlink recvmsg: The Dump request was interrupted");
    1163             : 
    1164             :                         /* OK we got netlink message. */
    1165          93 :                         if (IS_ZEBRA_DEBUG_KERNEL)
    1166           0 :                                 zlog_debug(
    1167             :                                         "%s: %s type %s(%u), len=%d, seq=%u, pid=%u",
    1168             :                                         __func__, nl->name,
    1169             :                                         nl_msg_type_to_str(h->nlmsg_type),
    1170             :                                         h->nlmsg_type, h->nlmsg_len,
    1171             :                                         h->nlmsg_seq, h->nlmsg_pid);
    1172             : 
    1173             : 
    1174             :                         /*
    1175             :                          * Ignore messages that maybe sent from
    1176             :                          * other actors besides the kernel
    1177             :                          */
    1178          93 :                         if (snl.nl_pid != 0) {
    1179           0 :                                 zlog_debug("Ignoring message from pid %u",
    1180             :                                            snl.nl_pid);
    1181           0 :                                 continue;
    1182             :                         }
    1183             : 
    1184          93 :                         error = (*filter)(h, zns->ns_id, startup);
    1185          93 :                         if (error < 0) {
    1186           0 :                                 zlog_debug("%s filter function error",
    1187             :                                            nl->name);
    1188           0 :                                 ret = error;
    1189             :                         }
    1190             :                 }
    1191             : 
    1192             :                 /* After error care. */
    1193          39 :                 if (msg.msg_flags & MSG_TRUNC) {
    1194           0 :                         flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
    1195             :                                  "%s error: message truncated", nl->name);
    1196           0 :                         continue;
    1197             :                 }
    1198          39 :                 if (status) {
    1199           0 :                         flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
    1200             :                                  "%s error: data remnant size %d", nl->name,
    1201             :                                  status);
    1202           0 :                         return -1;
    1203             :                 }
    1204             :         }
    1205           7 :         return ret;
    1206             : }
    1207             : 
    1208             : /*
    1209             :  * netlink_talk_info
    1210             :  *
    1211             :  * sendmsg() to netlink socket then recvmsg().
    1212             :  * Calls netlink_parse_info to parse returned data
    1213             :  *
    1214             :  * filter   -> The filter to read final results from kernel
    1215             :  * nlmsghdr -> The data to send to the kernel
    1216             :  * dp_info -> The dataplane and netlink socket information
    1217             :  * startup  -> Are we reading in under startup conditions
    1218             :  *             This is passed through eventually to filter.
    1219             :  */
    1220           0 : static int netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t,
    1221             :                                            int startup),
    1222             :                              struct nlmsghdr *n,
    1223             :                              struct zebra_dplane_info *dp_info, bool startup)
    1224             : {
    1225           0 :         struct nlsock *nl;
    1226             : 
    1227           0 :         nl = kernel_netlink_nlsock_lookup(dp_info->sock);
    1228           0 :         n->nlmsg_seq = dp_info->seq;
    1229           0 :         n->nlmsg_pid = nl->snl.nl_pid;
    1230             : 
    1231           0 :         if (IS_ZEBRA_DEBUG_KERNEL)
    1232           0 :                 zlog_debug(
    1233             :                         "netlink_talk: %s type %s(%u), len=%d seq=%u flags 0x%x",
    1234             :                         nl->name, nl_msg_type_to_str(n->nlmsg_type),
    1235             :                         n->nlmsg_type, n->nlmsg_len, n->nlmsg_seq,
    1236             :                         n->nlmsg_flags);
    1237             : 
    1238           0 :         if (netlink_send_msg(nl, n, n->nlmsg_len) == -1)
    1239             :                 return -1;
    1240             : 
    1241             :         /*
    1242             :          * Get reply from netlink socket.
    1243             :          * The reply should either be an acknowlegement or an error.
    1244             :          */
    1245           0 :         return netlink_parse_info(filter, nl, dp_info, 0, startup);
    1246             : }
    1247             : 
    1248             : /*
    1249             :  * Synchronous version of netlink_talk_info. Converts args to suit the
    1250             :  * common version, which is suitable for both sync and async use.
    1251             :  */
    1252           0 : int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
    1253             :                  struct nlmsghdr *n, struct nlsock *nl, struct zebra_ns *zns,
    1254             :                  bool startup)
    1255             : {
    1256           0 :         struct zebra_dplane_info dp_info;
    1257             : 
    1258             :         /* Increment sequence number before capturing snapshot of ns socket
    1259             :          * info.
    1260             :          */
    1261           0 :         nl->seq++;
    1262             : 
    1263             :         /* Capture info in intermediate info struct */
    1264           0 :         zebra_dplane_info_from_zns(&dp_info, zns, (nl == &(zns->netlink_cmd)));
    1265             : 
    1266           0 :         return netlink_talk_info(filter, n, &dp_info, startup);
    1267             : }
    1268             : 
    1269             : /* Issue request message to kernel via netlink socket. GET messages
    1270             :  * are issued through this interface.
    1271             :  */
    1272          22 : int netlink_request(struct nlsock *nl, void *req)
    1273             : {
    1274          22 :         struct nlmsghdr *n = (struct nlmsghdr *)req;
    1275             : 
    1276             :         /* Check netlink socket. */
    1277          22 :         if (nl->sock < 0) {
    1278           0 :                 flog_err_sys(EC_LIB_SOCKET, "%s socket isn't active.",
    1279             :                              nl->name);
    1280           0 :                 return -1;
    1281             :         }
    1282             : 
    1283             :         /* Fill common fields for all requests. */
    1284          22 :         n->nlmsg_pid = nl->snl.nl_pid;
    1285          22 :         n->nlmsg_seq = ++nl->seq;
    1286             : 
    1287          22 :         if (netlink_send_msg(nl, req, n->nlmsg_len) == -1)
    1288             :                 return -1;
    1289             : 
    1290             :         return 0;
    1291             : }
    1292             : 
    1293           9 : static int nl_batch_read_resp(struct nl_batch *bth)
    1294             : {
    1295           9 :         struct nlmsghdr *h;
    1296           9 :         struct sockaddr_nl snl;
    1297           9 :         struct msghdr msg = {};
    1298           9 :         int status, seq;
    1299           9 :         struct nlsock *nl;
    1300           9 :         struct zebra_dplane_ctx *ctx;
    1301           9 :         bool ignore_msg;
    1302             : 
    1303           9 :         nl = kernel_netlink_nlsock_lookup(bth->zns->sock);
    1304             : 
    1305           9 :         msg.msg_name = (void *)&snl;
    1306           9 :         msg.msg_namelen = sizeof(snl);
    1307             : 
    1308             :         /*
    1309             :          * The responses are not batched, so we need to read and process one
    1310             :          * message at a time.
    1311             :          */
    1312          13 :         while (true) {
    1313          13 :                 status = netlink_recv_msg(nl, &msg);
    1314             :                 /*
    1315             :                  * status == -1 is a full on failure somewhere
    1316             :                  * since we don't know where the problem happened
    1317             :                  * we must mark all as failed
    1318             :                  *
    1319             :                  * Else we mark everything as worked
    1320             :                  *
    1321             :                  */
    1322          13 :                 if (status == -1 || status == 0) {
    1323          47 :                         while ((ctx = dplane_ctx_dequeue(&(bth->ctx_list))) !=
    1324             :                                NULL) {
    1325          38 :                                 if (status == -1)
    1326           0 :                                         dplane_ctx_set_status(
    1327             :                                                 ctx,
    1328             :                                                 ZEBRA_DPLANE_REQUEST_FAILURE);
    1329          38 :                                 dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx);
    1330             :                         }
    1331           9 :                         return status;
    1332             :                 }
    1333             : 
    1334           4 :                 h = (struct nlmsghdr *)nl->buf;
    1335           4 :                 ignore_msg = false;
    1336           4 :                 seq = h->nlmsg_seq;
    1337             :                 /*
    1338             :                  * Find the corresponding context object. Received responses are
    1339             :                  * in the same order as requests we sent, so we can simply
    1340             :                  * iterate over the context list and match responses with
    1341             :                  * requests at same time.
    1342             :                  */
    1343          11 :                 while (true) {
    1344          11 :                         ctx = dplane_ctx_get_head(&(bth->ctx_list));
    1345          11 :                         if (ctx == NULL) {
    1346             :                                 /*
    1347             :                                  * This is a situation where we have gotten
    1348             :                                  * into a bad spot.  We need to know that
    1349             :                                  * this happens( does it? )
    1350             :                                  */
    1351           0 :                                 zlog_err(
    1352             :                                         "%s:WARNING Received netlink Response for an error and no Contexts to associate with it",
    1353             :                                         __func__);
    1354           0 :                                 break;
    1355             :                         }
    1356             : 
    1357             :                         /*
    1358             :                          * 'update' context objects take two consecutive
    1359             :                          * sequence numbers.
    1360             :                          */
    1361          11 :                         if (dplane_ctx_is_update(ctx) &&
    1362           0 :                             dplane_ctx_get_ns(ctx)->seq + 1 == seq) {
    1363             :                                 /*
    1364             :                                  * This is the situation where we get a response
    1365             :                                  * to a message that should be ignored.
    1366             :                                  */
    1367             :                                 ignore_msg = true;
    1368             :                                 break;
    1369             :                         }
    1370             : 
    1371          11 :                         ctx = dplane_ctx_dequeue(&(bth->ctx_list));
    1372          11 :                         dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx);
    1373             : 
    1374             :                         /* We have found corresponding context object. */
    1375          11 :                         if (dplane_ctx_get_ns(ctx)->seq == seq)
    1376             :                                 break;
    1377             : 
    1378           7 :                         if (dplane_ctx_get_ns(ctx)->seq > seq)
    1379           0 :                                 zlog_warn(
    1380             :                                         "%s:WARNING Received %u is less than any context on the queue ctx->seq %u",
    1381             :                                         __func__, seq,
    1382             :                                         dplane_ctx_get_ns(ctx)->seq);
    1383             :                 }
    1384             : 
    1385           4 :                 if (ignore_msg) {
    1386             :                         /*
    1387             :                          * If we ignore the message due to an update
    1388             :                          * above we should still fricking decode the
    1389             :                          * message for our operator to understand
    1390             :                          * what is going on
    1391             :                          */
    1392           0 :                         int err = netlink_parse_error(nl, h, bth->zns->is_cmd,
    1393             :                                                       false);
    1394             : 
    1395           0 :                         zlog_debug("%s: netlink error message seq=%d %d",
    1396             :                                    __func__, h->nlmsg_seq, err);
    1397           0 :                         continue;
    1398             :                 }
    1399             : 
    1400             :                 /*
    1401             :                  * We received a message with the sequence number that isn't
    1402             :                  * associated with any dplane context object.
    1403             :                  */
    1404           4 :                 if (ctx == NULL) {
    1405           0 :                         if (IS_ZEBRA_DEBUG_KERNEL)
    1406           0 :                                 zlog_debug(
    1407             :                                         "%s: skipping unassociated response, seq number %d NS %u",
    1408             :                                         __func__, h->nlmsg_seq,
    1409             :                                         bth->zns->ns_id);
    1410           0 :                         continue;
    1411             :                 }
    1412             : 
    1413           4 :                 if (h->nlmsg_type == NLMSG_ERROR) {
    1414           4 :                         int err = netlink_parse_error(nl, h, bth->zns->is_cmd,
    1415             :                                                       false);
    1416             : 
    1417           4 :                         if (err == -1)
    1418           4 :                                 dplane_ctx_set_status(
    1419             :                                         ctx, ZEBRA_DPLANE_REQUEST_FAILURE);
    1420             : 
    1421           4 :                         if (IS_ZEBRA_DEBUG_KERNEL)
    1422           0 :                                 zlog_debug("%s: netlink error message seq=%d ",
    1423             :                                            __func__, h->nlmsg_seq);
    1424           4 :                         continue;
    1425             :                 }
    1426             : 
    1427             :                 /*
    1428             :                  * If we get here then we did not receive neither the ack nor
    1429             :                  * the error and instead received some other message in an
    1430             :                  * unexpected way.
    1431             :                  */
    1432           0 :                 if (IS_ZEBRA_DEBUG_KERNEL)
    1433           0 :                         zlog_debug("%s: ignoring message type 0x%04x(%s) NS %u",
    1434             :                                    __func__, h->nlmsg_type,
    1435             :                                    nl_msg_type_to_str(h->nlmsg_type),
    1436             :                                    bth->zns->ns_id);
    1437             :         }
    1438             : 
    1439             :         return 0;
    1440             : }
    1441             : 
    1442          24 : static void nl_batch_reset(struct nl_batch *bth)
    1443             : {
    1444          24 :         bth->buf_head = bth->buf;
    1445          24 :         bth->curlen = 0;
    1446          24 :         bth->msgcnt = 0;
    1447          24 :         bth->zns = NULL;
    1448             : 
    1449          24 :         dplane_ctx_q_init(&(bth->ctx_list));
    1450             : }
    1451             : 
    1452          12 : static void nl_batch_init(struct nl_batch *bth,
    1453             :                           struct dplane_ctx_list_head *ctx_out_q)
    1454             : {
    1455             :         /*
    1456             :          * If the size of the buffer has changed, free and then allocate a new
    1457             :          * one.
    1458             :          */
    1459          12 :         size_t bufsize =
    1460          12 :                 atomic_load_explicit(&nl_batch_bufsize, memory_order_relaxed);
    1461          12 :         if (bufsize != nl_batch_tx_bufsize) {
    1462           2 :                 if (nl_batch_tx_buf)
    1463           0 :                         XFREE(MTYPE_NL_BUF, nl_batch_tx_buf);
    1464             : 
    1465           2 :                 nl_batch_tx_buf = XCALLOC(MTYPE_NL_BUF, bufsize);
    1466           2 :                 nl_batch_tx_bufsize = bufsize;
    1467             :         }
    1468             : 
    1469          12 :         bth->buf = nl_batch_tx_buf;
    1470          12 :         bth->bufsiz = bufsize;
    1471          12 :         bth->limit = atomic_load_explicit(&nl_batch_send_threshold,
    1472             :                                           memory_order_relaxed);
    1473             : 
    1474          12 :         bth->ctx_out_q = ctx_out_q;
    1475             : 
    1476          12 :         nl_batch_reset(bth);
    1477          12 : }
    1478             : 
    1479          12 : static void nl_batch_send(struct nl_batch *bth)
    1480             : {
    1481          12 :         struct zebra_dplane_ctx *ctx;
    1482          12 :         bool err = false;
    1483             : 
    1484          12 :         if (bth->curlen != 0 && bth->zns != NULL) {
    1485           9 :                 struct nlsock *nl =
    1486           9 :                         kernel_netlink_nlsock_lookup(bth->zns->sock);
    1487             : 
    1488           9 :                 if (IS_ZEBRA_DEBUG_KERNEL)
    1489           0 :                         zlog_debug("%s: %s, batch size=%zu, msg cnt=%zu",
    1490             :                                    __func__, nl->name, bth->curlen,
    1491             :                                    bth->msgcnt);
    1492             : 
    1493           9 :                 if (netlink_send_msg(nl, bth->buf, bth->curlen) == -1)
    1494             :                         err = true;
    1495             : 
    1496           9 :                 if (!err) {
    1497           9 :                         if (nl_batch_read_resp(bth) == -1)
    1498           0 :                                 err = true;
    1499             :                 }
    1500             :         }
    1501             : 
    1502             :         /* Move remaining contexts to the outbound queue. */
    1503          16 :         while (true) {
    1504          14 :                 ctx = dplane_ctx_dequeue(&(bth->ctx_list));
    1505          14 :                 if (ctx == NULL)
    1506             :                         break;
    1507             : 
    1508           2 :                 if (err)
    1509           0 :                         dplane_ctx_set_status(ctx,
    1510             :                                               ZEBRA_DPLANE_REQUEST_FAILURE);
    1511             : 
    1512           2 :                 dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx);
    1513             :         }
    1514             : 
    1515          12 :         nl_batch_reset(bth);
    1516          12 : }
    1517             : 
    1518          25 : enum netlink_msg_status netlink_batch_add_msg(
    1519             :         struct nl_batch *bth, struct zebra_dplane_ctx *ctx,
    1520             :         ssize_t (*msg_encoder)(struct zebra_dplane_ctx *, void *, size_t),
    1521             :         bool ignore_res)
    1522             : {
    1523          25 :         int seq;
    1524          25 :         ssize_t size;
    1525          25 :         struct nlmsghdr *msgh;
    1526          25 :         struct nlsock *nl;
    1527             : 
    1528          25 :         size = (*msg_encoder)(ctx, bth->buf_head, bth->bufsiz - bth->curlen);
    1529             : 
    1530             :         /*
    1531             :          * If there was an error while encoding the message (other than buffer
    1532             :          * overflow) then return an error.
    1533             :          */
    1534          25 :         if (size < 0)
    1535             :                 return FRR_NETLINK_ERROR;
    1536             : 
    1537             :         /*
    1538             :          * If the message doesn't fit entirely in the buffer then send the batch
    1539             :          * and retry.
    1540             :          */
    1541          25 :         if (size == 0) {
    1542           0 :                 nl_batch_send(bth);
    1543           0 :                 size = (*msg_encoder)(ctx, bth->buf_head,
    1544           0 :                                       bth->bufsiz - bth->curlen);
    1545             :                 /*
    1546             :                  * If the message doesn't fit in the empty buffer then just
    1547             :                  * return an error.
    1548             :                  */
    1549           0 :                 if (size <= 0)
    1550             :                         return FRR_NETLINK_ERROR;
    1551             :         }
    1552             : 
    1553          25 :         seq = dplane_ctx_get_ns(ctx)->seq;
    1554          25 :         nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
    1555             : 
    1556          25 :         if (ignore_res)
    1557           0 :                 seq++;
    1558             : 
    1559          25 :         msgh = (struct nlmsghdr *)bth->buf_head;
    1560          25 :         msgh->nlmsg_seq = seq;
    1561          25 :         msgh->nlmsg_pid = nl->snl.nl_pid;
    1562             : 
    1563          25 :         bth->zns = dplane_ctx_get_ns(ctx);
    1564          25 :         bth->buf_head = ((char *)bth->buf_head) + size;
    1565          25 :         bth->curlen += size;
    1566          25 :         bth->msgcnt++;
    1567             : 
    1568          25 :         return FRR_NETLINK_QUEUED;
    1569             : }
    1570             : 
    1571          51 : static enum netlink_msg_status nl_put_msg(struct nl_batch *bth,
    1572             :                                           struct zebra_dplane_ctx *ctx)
    1573             : {
    1574          51 :         if (dplane_ctx_is_skip_kernel(ctx))
    1575             :                 return FRR_NETLINK_SUCCESS;
    1576             : 
    1577          51 :         switch (dplane_ctx_get_op(ctx)) {
    1578             : 
    1579          29 :         case DPLANE_OP_ROUTE_INSTALL:
    1580             :         case DPLANE_OP_ROUTE_UPDATE:
    1581             :         case DPLANE_OP_ROUTE_DELETE:
    1582          29 :                 return netlink_put_route_update_msg(bth, ctx);
    1583             : 
    1584          22 :         case DPLANE_OP_NH_INSTALL:
    1585             :         case DPLANE_OP_NH_UPDATE:
    1586             :         case DPLANE_OP_NH_DELETE:
    1587          22 :                 return netlink_put_nexthop_update_msg(bth, ctx);
    1588             : 
    1589           0 :         case DPLANE_OP_LSP_INSTALL:
    1590             :         case DPLANE_OP_LSP_UPDATE:
    1591             :         case DPLANE_OP_LSP_DELETE:
    1592           0 :                 return netlink_put_lsp_update_msg(bth, ctx);
    1593             : 
    1594           0 :         case DPLANE_OP_PW_INSTALL:
    1595             :         case DPLANE_OP_PW_UNINSTALL:
    1596           0 :                 return netlink_put_pw_update_msg(bth, ctx);
    1597             : 
    1598           0 :         case DPLANE_OP_ADDR_INSTALL:
    1599             :         case DPLANE_OP_ADDR_UNINSTALL:
    1600           0 :                 return netlink_put_address_update_msg(bth, ctx);
    1601             : 
    1602           0 :         case DPLANE_OP_MAC_INSTALL:
    1603             :         case DPLANE_OP_MAC_DELETE:
    1604           0 :                 return netlink_put_mac_update_msg(bth, ctx);
    1605             : 
    1606           0 :         case DPLANE_OP_NEIGH_INSTALL:
    1607             :         case DPLANE_OP_NEIGH_UPDATE:
    1608             :         case DPLANE_OP_NEIGH_DELETE:
    1609             :         case DPLANE_OP_VTEP_ADD:
    1610             :         case DPLANE_OP_VTEP_DELETE:
    1611             :         case DPLANE_OP_NEIGH_DISCOVER:
    1612             :         case DPLANE_OP_NEIGH_IP_INSTALL:
    1613             :         case DPLANE_OP_NEIGH_IP_DELETE:
    1614             :         case DPLANE_OP_NEIGH_TABLE_UPDATE:
    1615           0 :                 return netlink_put_neigh_update_msg(bth, ctx);
    1616             : 
    1617           0 :         case DPLANE_OP_RULE_ADD:
    1618             :         case DPLANE_OP_RULE_DELETE:
    1619             :         case DPLANE_OP_RULE_UPDATE:
    1620           0 :                 return netlink_put_rule_update_msg(bth, ctx);
    1621             : 
    1622             :         case DPLANE_OP_SYS_ROUTE_ADD:
    1623             :         case DPLANE_OP_SYS_ROUTE_DELETE:
    1624             :         case DPLANE_OP_ROUTE_NOTIFY:
    1625             :         case DPLANE_OP_LSP_NOTIFY:
    1626             :         case DPLANE_OP_BR_PORT_UPDATE:
    1627             :                 return FRR_NETLINK_SUCCESS;
    1628             : 
    1629             :         case DPLANE_OP_IPTABLE_ADD:
    1630             :         case DPLANE_OP_IPTABLE_DELETE:
    1631             :         case DPLANE_OP_IPSET_ADD:
    1632             :         case DPLANE_OP_IPSET_DELETE:
    1633             :         case DPLANE_OP_IPSET_ENTRY_ADD:
    1634             :         case DPLANE_OP_IPSET_ENTRY_DELETE:
    1635             :                 return FRR_NETLINK_ERROR;
    1636             : 
    1637           0 :         case DPLANE_OP_GRE_SET:
    1638           0 :                 return netlink_put_gre_set_msg(bth, ctx);
    1639             : 
    1640             :         case DPLANE_OP_INTF_ADDR_ADD:
    1641             :         case DPLANE_OP_INTF_ADDR_DEL:
    1642             :         case DPLANE_OP_NONE:
    1643             :                 return FRR_NETLINK_ERROR;
    1644             : 
    1645           0 :         case DPLANE_OP_INTF_NETCONFIG:
    1646           0 :                 return netlink_put_intf_netconfig(bth, ctx);
    1647             : 
    1648           0 :         case DPLANE_OP_INTF_INSTALL:
    1649             :         case DPLANE_OP_INTF_UPDATE:
    1650             :         case DPLANE_OP_INTF_DELETE:
    1651           0 :                 return netlink_put_intf_update_msg(bth, ctx);
    1652             : 
    1653           0 :         case DPLANE_OP_TC_QDISC_INSTALL:
    1654             :         case DPLANE_OP_TC_QDISC_UNINSTALL:
    1655           0 :                 return netlink_put_tc_qdisc_update_msg(bth, ctx);
    1656           0 :         case DPLANE_OP_TC_CLASS_ADD:
    1657             :         case DPLANE_OP_TC_CLASS_DELETE:
    1658             :         case DPLANE_OP_TC_CLASS_UPDATE:
    1659           0 :                 return netlink_put_tc_class_update_msg(bth, ctx);
    1660           0 :         case DPLANE_OP_TC_FILTER_ADD:
    1661             :         case DPLANE_OP_TC_FILTER_DELETE:
    1662             :         case DPLANE_OP_TC_FILTER_UPDATE:
    1663           0 :                 return netlink_put_tc_filter_update_msg(bth, ctx);
    1664             :         }
    1665             : 
    1666             :         return FRR_NETLINK_ERROR;
    1667             : }
    1668             : 
    1669          12 : void kernel_update_multi(struct dplane_ctx_list_head *ctx_list)
    1670             : {
    1671          12 :         struct nl_batch batch;
    1672          12 :         struct zebra_dplane_ctx *ctx;
    1673          12 :         struct dplane_ctx_list_head handled_list;
    1674          12 :         enum netlink_msg_status res;
    1675             : 
    1676          12 :         dplane_ctx_q_init(&handled_list);
    1677          12 :         nl_batch_init(&batch, &handled_list);
    1678             : 
    1679          63 :         while (true) {
    1680          63 :                 ctx = dplane_ctx_dequeue(ctx_list);
    1681          63 :                 if (ctx == NULL)
    1682             :                         break;
    1683             : 
    1684          51 :                 if (batch.zns != NULL
    1685          33 :                     && batch.zns->ns_id != dplane_ctx_get_ns(ctx)->ns_id)
    1686           0 :                         nl_batch_send(&batch);
    1687             : 
    1688             :                 /*
    1689             :                  * Assume all messages will succeed and then mark only the ones
    1690             :                  * that failed.
    1691             :                  */
    1692          51 :                 dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS);
    1693             : 
    1694          51 :                 res = nl_put_msg(&batch, ctx);
    1695             : 
    1696          51 :                 dplane_ctx_enqueue_tail(&(batch.ctx_list), ctx);
    1697          51 :                 if (res == FRR_NETLINK_ERROR)
    1698           0 :                         dplane_ctx_set_status(ctx,
    1699             :                                               ZEBRA_DPLANE_REQUEST_FAILURE);
    1700             : 
    1701          51 :                 if (batch.curlen > batch.limit)
    1702           0 :                         nl_batch_send(&batch);
    1703             :         }
    1704             : 
    1705          12 :         nl_batch_send(&batch);
    1706             : 
    1707          12 :         dplane_ctx_q_init(ctx_list);
    1708          12 :         dplane_ctx_list_append(ctx_list, &handled_list);
    1709          12 : }
    1710             : 
    1711          76 : struct nlsock *kernel_netlink_nlsock_lookup(int sock)
    1712             : {
    1713          76 :         struct nlsock lookup, *retval;
    1714             : 
    1715          76 :         lookup.sock = sock;
    1716             : 
    1717          76 :         NLSOCK_LOCK();
    1718          76 :         retval = hash_lookup(nlsock_hash, &lookup);
    1719          76 :         NLSOCK_UNLOCK();
    1720             : 
    1721          76 :         return retval;
    1722             : }
    1723             : 
    1724             : /* Insert nlsock entry into hash */
    1725           8 : static void kernel_netlink_nlsock_insert(struct nlsock *nls)
    1726             : {
    1727           8 :         NLSOCK_LOCK();
    1728           8 :         (void)hash_get(nlsock_hash, nls, hash_alloc_intern);
    1729           8 :         NLSOCK_UNLOCK();
    1730           8 : }
    1731             : 
    1732             : /* Remove nlsock entry from hash */
    1733           8 : static void kernel_netlink_nlsock_remove(struct nlsock *nls)
    1734             : {
    1735           8 :         NLSOCK_LOCK();
    1736           8 :         (void)hash_release(nlsock_hash, nls);
    1737           8 :         NLSOCK_UNLOCK();
    1738           8 : }
    1739             : 
    1740          92 : static uint32_t kernel_netlink_nlsock_key(const void *arg)
    1741             : {
    1742          92 :         const struct nlsock *nl = arg;
    1743             : 
    1744          92 :         return nl->sock;
    1745             : }
    1746             : 
    1747          84 : static bool kernel_netlink_nlsock_hash_equal(const void *arg1, const void *arg2)
    1748             : {
    1749          84 :         const struct nlsock *nl1 = arg1;
    1750          84 :         const struct nlsock *nl2 = arg2;
    1751             : 
    1752          84 :         if (nl1->sock == nl2->sock)
    1753          84 :                 return true;
    1754             : 
    1755             :         return false;
    1756             : }
    1757             : 
    1758             : /* Exported interface function.  This function simply calls
    1759             :    netlink_socket (). */
    1760           2 : void kernel_init(struct zebra_ns *zns)
    1761             : {
    1762           2 :         uint32_t groups, dplane_groups, ext_groups;
    1763             : #if defined SOL_NETLINK
    1764           2 :         int one, ret;
    1765             : #endif
    1766             : 
    1767             :         /*
    1768             :          * Initialize netlink sockets
    1769             :          *
    1770             :          * If RTMGRP_XXX exists use that, but at some point
    1771             :          * I think the kernel developers realized that
    1772             :          * keeping track of all the different values would
    1773             :          * lead to confusion, so we need to convert the
    1774             :          * RTNLGRP_XXX to a bit position for ourself
    1775             :          */
    1776           2 :         groups = RTMGRP_LINK                   |
    1777             :                         RTMGRP_IPV4_ROUTE              |
    1778             :                         RTMGRP_IPV4_IFADDR             |
    1779             :                         RTMGRP_IPV6_ROUTE              |
    1780             :                         RTMGRP_IPV6_IFADDR             |
    1781             :                         RTMGRP_IPV4_MROUTE             |
    1782             :                         RTMGRP_NEIGH                   |
    1783             :                         ((uint32_t) 1 << (RTNLGRP_IPV4_RULE - 1)) |
    1784             :                         ((uint32_t) 1 << (RTNLGRP_IPV6_RULE - 1)) |
    1785             :                         ((uint32_t) 1 << (RTNLGRP_NEXTHOP - 1))   |
    1786             :                         ((uint32_t) 1 << (RTNLGRP_TC - 1));
    1787             : 
    1788           2 :         dplane_groups = (RTMGRP_LINK            |
    1789             :                          RTMGRP_IPV4_IFADDR     |
    1790             :                          RTMGRP_IPV6_IFADDR     |
    1791             :                          ((uint32_t) 1 << (RTNLGRP_IPV4_NETCONF - 1)) |
    1792             :                          ((uint32_t) 1 << (RTNLGRP_IPV6_NETCONF - 1)) |
    1793             :                          ((uint32_t) 1 << (RTNLGRP_MPLS_NETCONF - 1)));
    1794             : 
    1795             :         /* Use setsockopt for > 31 group */
    1796           2 :         ext_groups = RTNLGRP_TUNNEL;
    1797             : 
    1798           2 :         snprintf(zns->netlink.name, sizeof(zns->netlink.name),
    1799             :                  "netlink-listen (NS %u)", zns->ns_id);
    1800           2 :         zns->netlink.sock = -1;
    1801           2 :         if (netlink_socket(&zns->netlink, groups, &ext_groups, 1, zns->ns_id) <
    1802             :             0) {
    1803           0 :                 zlog_err("Failure to create %s socket",
    1804             :                          zns->netlink.name);
    1805           0 :                 exit(-1);
    1806             :         }
    1807             : 
    1808           2 :         kernel_netlink_nlsock_insert(&zns->netlink);
    1809             : 
    1810           2 :         snprintf(zns->netlink_cmd.name, sizeof(zns->netlink_cmd.name),
    1811             :                  "netlink-cmd (NS %u)", zns->ns_id);
    1812           2 :         zns->netlink_cmd.sock = -1;
    1813           2 :         if (netlink_socket(&zns->netlink_cmd, 0, 0, 0, zns->ns_id) < 0) {
    1814           0 :                 zlog_err("Failure to create %s socket",
    1815             :                          zns->netlink_cmd.name);
    1816           0 :                 exit(-1);
    1817             :         }
    1818             : 
    1819           2 :         kernel_netlink_nlsock_insert(&zns->netlink_cmd);
    1820             : 
    1821             :         /* Outbound socket for dplane programming of the host OS. */
    1822           2 :         snprintf(zns->netlink_dplane_out.name,
    1823             :                  sizeof(zns->netlink_dplane_out.name), "netlink-dp (NS %u)",
    1824             :                  zns->ns_id);
    1825           2 :         zns->netlink_dplane_out.sock = -1;
    1826           2 :         if (netlink_socket(&zns->netlink_dplane_out, 0, 0, 0, zns->ns_id) < 0) {
    1827           0 :                 zlog_err("Failure to create %s socket",
    1828             :                          zns->netlink_dplane_out.name);
    1829           0 :                 exit(-1);
    1830             :         }
    1831             : 
    1832           2 :         kernel_netlink_nlsock_insert(&zns->netlink_dplane_out);
    1833             : 
    1834             :         /* Inbound socket for OS events coming to the dplane. */
    1835           2 :         snprintf(zns->netlink_dplane_in.name,
    1836             :                  sizeof(zns->netlink_dplane_in.name), "netlink-dp-in (NS %u)",
    1837             :                  zns->ns_id);
    1838           2 :         zns->netlink_dplane_in.sock = -1;
    1839           2 :         if (netlink_socket(&zns->netlink_dplane_in, dplane_groups, 0, 0,
    1840             :                            zns->ns_id) < 0) {
    1841           0 :                 zlog_err("Failure to create %s socket",
    1842             :                          zns->netlink_dplane_in.name);
    1843           0 :                 exit(-1);
    1844             :         }
    1845             : 
    1846           2 :         kernel_netlink_nlsock_insert(&zns->netlink_dplane_in);
    1847             : 
    1848             :         /*
    1849             :          * SOL_NETLINK is not available on all platforms yet
    1850             :          * apparently.  It's in bits/socket.h which I am not
    1851             :          * sure that we want to pull into our build system.
    1852             :          */
    1853             : #if defined SOL_NETLINK
    1854             :         /*
    1855             :          * Let's tell the kernel that we want to receive extended
    1856             :          * ACKS over our command socket(s)
    1857             :          */
    1858           2 :         one = 1;
    1859           2 :         ret = setsockopt(zns->netlink_cmd.sock, SOL_NETLINK, NETLINK_EXT_ACK,
    1860             :                          &one, sizeof(one));
    1861             : 
    1862           2 :         if (ret < 0)
    1863           0 :                 zlog_notice("Registration for extended cmd ACK failed : %d %s",
    1864             :                             errno, safe_strerror(errno));
    1865             : 
    1866           2 :         one = 1;
    1867           2 :         ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK,
    1868             :                          NETLINK_EXT_ACK, &one, sizeof(one));
    1869             : 
    1870           2 :         if (ret < 0)
    1871           0 :                 zlog_notice("Registration for extended dp ACK failed : %d %s",
    1872             :                             errno, safe_strerror(errno));
    1873             : 
    1874             :         /*
    1875             :          * Trim off the payload of the original netlink message in the
    1876             :          * acknowledgment. This option is available since Linux 4.2, so if
    1877             :          * setsockopt fails, ignore the error.
    1878             :          */
    1879           2 :         one = 1;
    1880           2 :         ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK,
    1881             :                          NETLINK_CAP_ACK, &one, sizeof(one));
    1882           2 :         if (ret < 0)
    1883           0 :                 zlog_notice(
    1884             :                         "Registration for reduced ACK packet size failed, probably running an early kernel");
    1885             : #endif
    1886             : 
    1887             :         /* Register kernel socket. */
    1888           2 :         if (fcntl(zns->netlink.sock, F_SETFL, O_NONBLOCK) < 0)
    1889           0 :                 flog_err_sys(EC_LIB_SOCKET, "Can't set %s socket flags: %s",
    1890             :                              zns->netlink.name, safe_strerror(errno));
    1891             : 
    1892           2 :         if (fcntl(zns->netlink_cmd.sock, F_SETFL, O_NONBLOCK) < 0)
    1893           0 :                 zlog_err("Can't set %s socket error: %s(%d)",
    1894             :                          zns->netlink_cmd.name, safe_strerror(errno), errno);
    1895             : 
    1896           2 :         if (fcntl(zns->netlink_dplane_out.sock, F_SETFL, O_NONBLOCK) < 0)
    1897           0 :                 zlog_err("Can't set %s socket error: %s(%d)",
    1898             :                          zns->netlink_dplane_out.name, safe_strerror(errno),
    1899             :                          errno);
    1900             : 
    1901           2 :         if (fcntl(zns->netlink_dplane_in.sock, F_SETFL, O_NONBLOCK) < 0)
    1902           0 :                 zlog_err("Can't set %s socket error: %s(%d)",
    1903             :                          zns->netlink_dplane_in.name, safe_strerror(errno),
    1904             :                          errno);
    1905             : 
    1906             :         /* Set receive buffer size if it's set from command line */
    1907           2 :         if (rcvbufsize) {
    1908           2 :                 netlink_recvbuf(&zns->netlink, rcvbufsize);
    1909           2 :                 netlink_recvbuf(&zns->netlink_cmd, rcvbufsize);
    1910           2 :                 netlink_recvbuf(&zns->netlink_dplane_out, rcvbufsize);
    1911           2 :                 netlink_recvbuf(&zns->netlink_dplane_in, rcvbufsize);
    1912             :         }
    1913             : 
    1914             :         /* Set filter for inbound sockets, to exclude events we've generated
    1915             :          * ourselves.
    1916             :          */
    1917           2 :         netlink_install_filter(zns->netlink.sock, zns->netlink_cmd.snl.nl_pid,
    1918             :                                zns->netlink_dplane_out.snl.nl_pid);
    1919             : 
    1920           2 :         netlink_install_filter(zns->netlink_dplane_in.sock,
    1921             :                                zns->netlink_cmd.snl.nl_pid,
    1922             :                                zns->netlink_dplane_out.snl.nl_pid);
    1923             : 
    1924           2 :         zns->t_netlink = NULL;
    1925             : 
    1926           2 :         thread_add_read(zrouter.master, kernel_read, zns,
    1927             :                         zns->netlink.sock, &zns->t_netlink);
    1928             : 
    1929           2 :         rt_netlink_init();
    1930           2 : }
    1931             : 
    1932             : /* Helper to clean up an nlsock */
    1933          14 : static void kernel_nlsock_fini(struct nlsock *nls)
    1934             : {
    1935          14 :         if (nls && nls->sock >= 0) {
    1936           8 :                 kernel_netlink_nlsock_remove(nls);
    1937           8 :                 close(nls->sock);
    1938           8 :                 nls->sock = -1;
    1939           8 :                 XFREE(MTYPE_NL_BUF, nls->buf);
    1940           8 :                 nls->buflen = 0;
    1941             :         }
    1942          14 : }
    1943             : 
    1944           4 : void kernel_terminate(struct zebra_ns *zns, bool complete)
    1945             : {
    1946           4 :         THREAD_OFF(zns->t_netlink);
    1947             : 
    1948           4 :         kernel_nlsock_fini(&zns->netlink);
    1949             : 
    1950           4 :         kernel_nlsock_fini(&zns->netlink_cmd);
    1951             : 
    1952           4 :         kernel_nlsock_fini(&zns->netlink_dplane_in);
    1953             : 
    1954             :         /* During zebra shutdown, we need to leave the dataplane socket
    1955             :          * around until all work is done.
    1956             :          */
    1957           4 :         if (complete)
    1958           2 :                 kernel_nlsock_fini(&zns->netlink_dplane_out);
    1959           4 : }
    1960             : 
    1961             : /*
    1962             :  * Global init for platform-/OS-specific things
    1963             :  */
    1964           2 : void kernel_router_init(void)
    1965             : {
    1966             :         /* Init nlsock hash and lock */
    1967           2 :         pthread_mutex_init(&nlsock_mutex, NULL);
    1968           2 :         nlsock_hash = hash_create_size(8, kernel_netlink_nlsock_key,
    1969             :                                        kernel_netlink_nlsock_hash_equal,
    1970             :                                        "Netlink Socket Hash");
    1971           2 : }
    1972             : 
    1973             : /*
    1974             :  * Global deinit for platform-/OS-specific things
    1975             :  */
    1976           2 : void kernel_router_terminate(void)
    1977             : {
    1978           2 :         pthread_mutex_destroy(&nlsock_mutex);
    1979             : 
    1980           2 :         hash_free(nlsock_hash);
    1981           2 :         nlsock_hash = NULL;
    1982           2 : }
    1983             : 
    1984             : #endif /* HAVE_NETLINK */

Generated by: LCOV version v1.16-topotato