Line data Source code
1 : /*
2 : * Zebra Policy Based Routing (PBR) interaction with the kernel using
3 : * netlink.
4 : * Copyright (C) 2018 Cumulus Networks, Inc.
5 : *
6 : * This file is part of FRR.
7 : *
8 : * FRR is free software; you can redistribute it and/or modify it
9 : * under the terms of the GNU General Public License as published by the
10 : * Free Software Foundation; either version 2, or (at your option) any
11 : * later version.
12 : *
13 : * FRR is distributed in the hope that it will be useful, but
14 : * WITHOUT ANY WARRANTY; without even the implied warranty of
15 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 : * General Public License for more details.
17 : *
18 : * You should have received a copy of the GNU General Public License
19 : * along with FRR; see the file COPYING. If not, write to the Free
20 : * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 : * 02111-1307, USA.
22 : */
23 :
24 : #include <zebra.h>
25 :
26 : #ifdef HAVE_NETLINK
27 :
28 : #include "if.h"
29 : #include "prefix.h"
30 : #include "vrf.h"
31 :
32 : #include <linux/fib_rules.h>
33 : #include "zebra/zserv.h"
34 : #include "zebra/zebra_ns.h"
35 : #include "zebra/zebra_vrf.h"
36 : #include "zebra/rt.h"
37 : #include "zebra/interface.h"
38 : #include "zebra/debug.h"
39 : #include "zebra/rtadv.h"
40 : #include "zebra/kernel_netlink.h"
41 : #include "zebra/rule_netlink.h"
42 : #include "zebra/zebra_pbr.h"
43 : #include "zebra/zebra_errors.h"
44 : #include "zebra/zebra_dplane.h"
45 : #include "zebra/zebra_trace.h"
46 :
47 : /* definitions */
48 :
49 : /* static function declarations */
50 :
51 : /* Private functions */
52 :
53 :
54 : /*
55 : * netlink_rule_msg_encode
56 : *
57 : * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen.
58 : *
59 : * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
60 : * or the number of bytes written to buf.
61 : */
62 0 : static ssize_t netlink_rule_msg_encode(
63 : int cmd, const struct zebra_dplane_ctx *ctx, uint32_t filter_bm,
64 : uint32_t priority, uint32_t table, const struct prefix *src_ip,
65 : const struct prefix *dst_ip, uint32_t fwmark, uint8_t dsfield,
66 : uint8_t ip_protocol, void *buf, size_t buflen)
67 : {
68 0 : uint8_t protocol = RTPROT_ZEBRA;
69 0 : int family;
70 0 : int bytelen;
71 0 : struct {
72 : struct nlmsghdr n;
73 : struct fib_rule_hdr frh;
74 : char buf[];
75 0 : } *req = buf;
76 :
77 0 : const char *ifname = dplane_ctx_rule_get_ifname(ctx);
78 :
79 0 : if (buflen < sizeof(*req))
80 : return 0;
81 0 : memset(req, 0, sizeof(*req));
82 :
83 : /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */
84 0 : if (PREFIX_FAMILY(src_ip))
85 0 : family = PREFIX_FAMILY(src_ip);
86 0 : else if (PREFIX_FAMILY(dst_ip))
87 0 : family = PREFIX_FAMILY(dst_ip);
88 : else
89 : family = AF_INET;
90 :
91 0 : bytelen = (family == AF_INET ? 4 : 16);
92 :
93 0 : req->n.nlmsg_type = cmd;
94 0 : req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
95 0 : req->n.nlmsg_flags = NLM_F_REQUEST;
96 :
97 0 : req->frh.family = family;
98 0 : req->frh.action = FR_ACT_TO_TBL;
99 :
100 0 : if (!nl_attr_put(&req->n, buflen, FRA_PROTOCOL, &protocol,
101 : sizeof(protocol)))
102 : return 0;
103 :
104 : /* rule's pref # */
105 0 : if (!nl_attr_put32(&req->n, buflen, FRA_PRIORITY, priority))
106 : return 0;
107 :
108 : /* interface on which applied */
109 0 : if (!nl_attr_put(&req->n, buflen, FRA_IFNAME, ifname,
110 0 : strlen(ifname) + 1))
111 : return 0;
112 :
113 : /* source IP, if specified */
114 0 : if (filter_bm & PBR_FILTER_SRC_IP) {
115 0 : req->frh.src_len = src_ip->prefixlen;
116 0 : if (!nl_attr_put(&req->n, buflen, FRA_SRC, &src_ip->u.prefix,
117 : bytelen))
118 : return 0;
119 : }
120 :
121 : /* destination IP, if specified */
122 0 : if (filter_bm & PBR_FILTER_DST_IP) {
123 0 : req->frh.dst_len = dst_ip->prefixlen;
124 0 : if (!nl_attr_put(&req->n, buflen, FRA_DST, &dst_ip->u.prefix,
125 : bytelen))
126 : return 0;
127 : }
128 :
129 : /* fwmark, if specified */
130 0 : if (filter_bm & PBR_FILTER_FWMARK) {
131 0 : if (!nl_attr_put32(&req->n, buflen, FRA_FWMARK, fwmark))
132 : return 0;
133 : }
134 :
135 : /* dsfield, if specified */
136 0 : if (filter_bm & PBR_FILTER_DSFIELD)
137 0 : req->frh.tos = dsfield;
138 :
139 : /* protocol to match on */
140 0 : if (filter_bm & PBR_FILTER_IP_PROTOCOL)
141 0 : nl_attr_put8(&req->n, buflen, FRA_IP_PROTO, ip_protocol);
142 :
143 : /* Route table to use to forward, if filter criteria matches. */
144 0 : if (table < 256)
145 0 : req->frh.table = table;
146 : else {
147 0 : req->frh.table = RT_TABLE_UNSPEC;
148 0 : if (!nl_attr_put32(&req->n, buflen, FRA_TABLE, table))
149 : return 0;
150 : }
151 :
152 0 : if (IS_ZEBRA_DEBUG_KERNEL)
153 0 : zlog_debug(
154 : "Tx %s family %s IF %s Pref %u Fwmark %u Src %pFX Dst %pFX Table %u",
155 : nl_msg_type_to_str(cmd), nl_family_to_str(family),
156 : ifname, priority, fwmark, src_ip, dst_ip, table);
157 :
158 0 : return NLMSG_ALIGN(req->n.nlmsg_len);
159 : }
160 :
161 0 : static ssize_t netlink_rule_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf,
162 : size_t buflen)
163 : {
164 0 : int cmd = RTM_NEWRULE;
165 :
166 0 : if (dplane_ctx_get_op(ctx) == DPLANE_OP_RULE_DELETE)
167 0 : cmd = RTM_DELRULE;
168 :
169 0 : return netlink_rule_msg_encode(
170 : cmd, ctx, dplane_ctx_rule_get_filter_bm(ctx),
171 : dplane_ctx_rule_get_priority(ctx),
172 : dplane_ctx_rule_get_table(ctx), dplane_ctx_rule_get_src_ip(ctx),
173 : dplane_ctx_rule_get_dst_ip(ctx),
174 : dplane_ctx_rule_get_fwmark(ctx),
175 0 : dplane_ctx_rule_get_dsfield(ctx),
176 0 : dplane_ctx_rule_get_ipproto(ctx), buf, buflen);
177 : }
178 :
179 0 : static ssize_t netlink_oldrule_msg_encoder(struct zebra_dplane_ctx *ctx,
180 : void *buf, size_t buflen)
181 : {
182 0 : return netlink_rule_msg_encode(
183 : RTM_DELRULE, ctx, dplane_ctx_rule_get_old_filter_bm(ctx),
184 : dplane_ctx_rule_get_old_priority(ctx),
185 : dplane_ctx_rule_get_old_table(ctx),
186 : dplane_ctx_rule_get_old_src_ip(ctx),
187 : dplane_ctx_rule_get_old_dst_ip(ctx),
188 : dplane_ctx_rule_get_old_fwmark(ctx),
189 0 : dplane_ctx_rule_get_old_dsfield(ctx),
190 0 : dplane_ctx_rule_get_old_ipproto(ctx), buf, buflen);
191 : }
192 :
193 : /* Public functions */
194 :
195 : enum netlink_msg_status
196 0 : netlink_put_rule_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
197 : {
198 0 : enum dplane_op_e op;
199 0 : enum netlink_msg_status ret;
200 :
201 0 : op = dplane_ctx_get_op(ctx);
202 0 : if (!(op == DPLANE_OP_RULE_ADD || op == DPLANE_OP_RULE_UPDATE
203 : || op == DPLANE_OP_RULE_DELETE)) {
204 0 : flog_err(
205 : EC_ZEBRA_PBR_RULE_UPDATE,
206 : "Context received for kernel rule update with incorrect OP code (%u)",
207 : op);
208 0 : return FRR_NETLINK_ERROR;
209 : }
210 :
211 0 : ret = netlink_batch_add_msg(bth, ctx, netlink_rule_msg_encoder, false);
212 :
213 : /**
214 : * Delete the old one.
215 : *
216 : * Don't care about this result right?
217 : */
218 0 : if (op == DPLANE_OP_RULE_UPDATE)
219 0 : netlink_batch_add_msg(bth, ctx, netlink_oldrule_msg_encoder,
220 : true);
221 :
222 : return ret;
223 : }
224 :
225 : /*
226 : * Handle netlink notification informing a rule add or delete.
227 : * Handling of an ADD is TBD.
228 : * DELs are notified up, if other attributes indicate it may be a
229 : * notification of interest. The expectation is that if this corresponds
230 : * to a PBR rule added by FRR, it will be readded.
231 : *
232 : * If startup and we see a rule we created, delete it as its leftover
233 : * from a previous instance and should have been removed on shutdown.
234 : *
235 : */
236 20 : int netlink_rule_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
237 : {
238 20 : struct zebra_ns *zns;
239 20 : struct fib_rule_hdr *frh;
240 20 : struct rtattr *tb[FRA_MAX + 1];
241 20 : int len;
242 20 : char *ifname;
243 20 : struct zebra_pbr_rule rule = {};
244 20 : uint8_t proto = 0;
245 20 : uint8_t ip_proto = 0;
246 :
247 20 : frrtrace(3, frr_zebra, netlink_rule_change, h, ns_id, startup);
248 :
249 : /* Basic validation followed by extracting attributes. */
250 20 : if (h->nlmsg_type != RTM_NEWRULE && h->nlmsg_type != RTM_DELRULE)
251 : return 0;
252 :
253 20 : len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct fib_rule_hdr));
254 20 : if (len < 0) {
255 0 : zlog_err(
256 : "%s: Message received from netlink is of a broken size: %d %zu",
257 : __func__, h->nlmsg_len,
258 : (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr)));
259 0 : return -1;
260 : }
261 :
262 20 : frh = NLMSG_DATA(h);
263 :
264 20 : if (frh->family != AF_INET && frh->family != AF_INET6) {
265 0 : if (frh->family == RTNL_FAMILY_IPMR
266 0 : || frh->family == RTNL_FAMILY_IP6MR) {
267 0 : if (IS_ZEBRA_DEBUG_KERNEL)
268 0 : zlog_debug(
269 : "Received rule netlink that we are ignoring for family %u, rule change: %u",
270 : frh->family, h->nlmsg_type);
271 0 : return 0;
272 : }
273 0 : flog_warn(
274 : EC_ZEBRA_NETLINK_INVALID_AF,
275 : "Invalid address family: %u received from kernel rule change: %u",
276 : frh->family, h->nlmsg_type);
277 0 : return 0;
278 : }
279 20 : if (frh->action != FR_ACT_TO_TBL)
280 : return 0;
281 :
282 20 : memset(tb, 0, sizeof(tb));
283 20 : netlink_parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
284 :
285 20 : if (tb[FRA_PRIORITY])
286 12 : rule.rule.priority = *(uint32_t *)RTA_DATA(tb[FRA_PRIORITY]);
287 :
288 20 : if (tb[FRA_SRC]) {
289 0 : if (frh->family == AF_INET)
290 0 : memcpy(&rule.rule.filter.src_ip.u.prefix4,
291 : RTA_DATA(tb[FRA_SRC]), 4);
292 : else
293 0 : memcpy(&rule.rule.filter.src_ip.u.prefix6,
294 : RTA_DATA(tb[FRA_SRC]), 16);
295 0 : rule.rule.filter.src_ip.prefixlen = frh->src_len;
296 0 : rule.rule.filter.src_ip.family = frh->family;
297 0 : rule.rule.filter.filter_bm |= PBR_FILTER_SRC_IP;
298 : }
299 :
300 20 : if (tb[FRA_DST]) {
301 0 : if (frh->family == AF_INET)
302 0 : memcpy(&rule.rule.filter.dst_ip.u.prefix4,
303 : RTA_DATA(tb[FRA_DST]), 4);
304 : else
305 0 : memcpy(&rule.rule.filter.dst_ip.u.prefix6,
306 : RTA_DATA(tb[FRA_DST]), 16);
307 0 : rule.rule.filter.dst_ip.prefixlen = frh->dst_len;
308 0 : rule.rule.filter.dst_ip.family = frh->family;
309 0 : rule.rule.filter.filter_bm |= PBR_FILTER_DST_IP;
310 : }
311 :
312 20 : if (tb[FRA_TABLE])
313 20 : rule.rule.action.table = *(uint32_t *)RTA_DATA(tb[FRA_TABLE]);
314 : else
315 0 : rule.rule.action.table = frh->table;
316 :
317 : /* TBD: We don't care about rules not specifying an IIF. */
318 20 : if (tb[FRA_IFNAME] == NULL)
319 : return 0;
320 :
321 0 : if (tb[FRA_PROTOCOL])
322 0 : proto = *(uint8_t *)RTA_DATA(tb[FRA_PROTOCOL]);
323 :
324 0 : if (tb[FRA_IP_PROTO])
325 0 : ip_proto = *(uint8_t *)RTA_DATA(tb[FRA_IP_PROTO]);
326 :
327 0 : ifname = (char *)RTA_DATA(tb[FRA_IFNAME]);
328 0 : strlcpy(rule.ifname, ifname, sizeof(rule.ifname));
329 :
330 0 : if (h->nlmsg_type == RTM_NEWRULE) {
331 : /*
332 : * If we see a rule at startup we created, delete it now.
333 : * It should have been flushed on a previous shutdown.
334 : */
335 0 : if (startup && proto == RTPROT_ZEBRA) {
336 0 : enum zebra_dplane_result ret;
337 :
338 0 : ret = dplane_pbr_rule_delete(&rule);
339 :
340 0 : zlog_debug(
341 : "%s: %s leftover rule: family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u",
342 : __func__,
343 : ((ret == ZEBRA_DPLANE_REQUEST_FAILURE)
344 : ? "Failed to remove"
345 : : "Removed"),
346 : nl_family_to_str(frh->family), rule.ifname,
347 : rule.rule.priority, &rule.rule.filter.src_ip,
348 : &rule.rule.filter.dst_ip,
349 : rule.rule.action.table, ip_proto);
350 : }
351 :
352 : /* TBD */
353 0 : return 0;
354 : }
355 :
356 0 : zns = zebra_ns_lookup(ns_id);
357 :
358 : /* If we don't know the interface, we don't care. */
359 0 : if (!if_lookup_by_name_per_ns(zns, ifname))
360 : return 0;
361 :
362 0 : if (IS_ZEBRA_DEBUG_KERNEL)
363 0 : zlog_debug(
364 : "Rx %s family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u",
365 : nl_msg_type_to_str(h->nlmsg_type),
366 : nl_family_to_str(frh->family), rule.ifname,
367 : rule.rule.priority, &rule.rule.filter.src_ip,
368 : &rule.rule.filter.dst_ip, rule.rule.action.table,
369 : ip_proto);
370 :
371 0 : return kernel_pbr_rule_del(&rule);
372 : }
373 :
374 : /*
375 : * Request rules from the kernel
376 : */
377 8 : static int netlink_request_rules(struct zebra_ns *zns, int family, int type)
378 : {
379 8 : struct {
380 : struct nlmsghdr n;
381 : struct fib_rule_hdr frh;
382 : char buf[NL_PKT_BUF_SIZE];
383 : } req;
384 :
385 8 : memset(&req, 0, sizeof(req));
386 8 : req.n.nlmsg_type = type;
387 8 : req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
388 8 : req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct fib_rule_hdr));
389 8 : req.frh.family = family;
390 :
391 8 : return netlink_request(&zns->netlink_cmd, &req);
392 : }
393 :
394 : /*
395 : * Get to know existing PBR rules in the kernel - typically called at startup.
396 : */
397 4 : int netlink_rules_read(struct zebra_ns *zns)
398 : {
399 4 : int ret;
400 4 : struct zebra_dplane_info dp_info;
401 :
402 4 : zebra_dplane_info_from_zns(&dp_info, zns, true);
403 :
404 4 : ret = netlink_request_rules(zns, AF_INET, RTM_GETRULE);
405 4 : if (ret < 0)
406 : return ret;
407 :
408 4 : ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd,
409 : &dp_info, 0, true);
410 4 : if (ret < 0)
411 : return ret;
412 :
413 4 : ret = netlink_request_rules(zns, AF_INET6, RTM_GETRULE);
414 4 : if (ret < 0)
415 : return ret;
416 :
417 4 : ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd,
418 : &dp_info, 0, true);
419 4 : return ret;
420 : }
421 :
422 : #endif /* HAVE_NETLINK */
|