Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /* Thread management routine
3 : * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
4 : */
5 :
6 : /* #define DEBUG */
7 :
8 : #include <zebra.h>
9 : #include <sys/resource.h>
10 :
11 : #include "frrevent.h"
12 : #include "memory.h"
13 : #include "frrcu.h"
14 : #include "log.h"
15 : #include "hash.h"
16 : #include "command.h"
17 : #include "sigevent.h"
18 : #include "network.h"
19 : #include "jhash.h"
20 : #include "frratomic.h"
21 : #include "frr_pthread.h"
22 : #include "lib_errors.h"
23 : #include "libfrr_trace.h"
24 : #include "libfrr.h"
25 :
26 12 : DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
27 12 : DEFINE_MTYPE_STATIC(LIB, EVENT_MASTER, "Thread master");
28 12 : DEFINE_MTYPE_STATIC(LIB, EVENT_POLL, "Thread Poll Info");
29 12 : DEFINE_MTYPE_STATIC(LIB, EVENT_STATS, "Thread stats");
30 :
31 15363 : DECLARE_LIST(event_list, struct event, eventitem);
32 :
33 : struct cancel_req {
34 : int flags;
35 : struct event *thread;
36 : void *eventobj;
37 : struct event **threadref;
38 : };
39 :
40 : /* Flags for task cancellation */
41 : #define EVENT_CANCEL_FLAG_READY 0x01
42 :
43 947 : static int event_timer_cmp(const struct event *a, const struct event *b)
44 : {
45 947 : if (a->u.sands.tv_sec < b->u.sands.tv_sec)
46 : return -1;
47 612 : if (a->u.sands.tv_sec > b->u.sands.tv_sec)
48 : return 1;
49 193 : if (a->u.sands.tv_usec < b->u.sands.tv_usec)
50 : return -1;
51 42 : if (a->u.sands.tv_usec > b->u.sands.tv_usec)
52 : return 1;
53 : return 0;
54 : }
55 :
56 1472 : DECLARE_HEAP(event_timer_list, struct event, timeritem, event_timer_cmp);
57 :
58 : #define AWAKEN(m) \
59 : do { \
60 : const unsigned char wakebyte = 0x01; \
61 : write(m->io_pipe[1], &wakebyte, 1); \
62 : } while (0)
63 :
64 : /* control variable for initializer */
65 : static pthread_once_t init_once = PTHREAD_ONCE_INIT;
66 : pthread_key_t thread_current;
67 :
68 : static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
69 : static struct list *masters;
70 :
71 : static void thread_free(struct event_loop *master, struct event *thread);
72 :
73 : bool cputime_enabled = true;
74 : unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
75 : unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
76 :
77 : /* CLI start ---------------------------------------------------------------- */
78 : #include "lib/event_clippy.c"
79 :
80 823 : static uint32_t cpu_record_hash_key(const struct cpu_event_history *a)
81 : {
82 823 : int size = sizeof(a->func);
83 :
84 823 : return jhash(&a->func, size, 0);
85 : }
86 :
87 595 : static int cpu_record_hash_cmp(const struct cpu_event_history *a,
88 : const struct cpu_event_history *b)
89 : {
90 595 : return numcmp((uintptr_t)a->func, (uintptr_t)b->func);
91 : }
92 :
93 3143 : DECLARE_HASH(cpu_records, struct cpu_event_history, item, cpu_record_hash_cmp,
94 : cpu_record_hash_key);
95 :
96 717 : static struct cpu_event_history *cpu_records_get(struct event_loop *loop,
97 : void (*func)(struct event *e),
98 : const char *funcname)
99 : {
100 717 : struct cpu_event_history ref = { .func = func }, *res;
101 :
102 717 : res = cpu_records_find(loop->cpu_records, &ref);
103 717 : if (!res) {
104 122 : res = XCALLOC(MTYPE_EVENT_STATS, sizeof(*res));
105 122 : res->func = func;
106 122 : res->funcname = funcname;
107 122 : cpu_records_add(loop->cpu_records, res);
108 : }
109 717 : return res;
110 : }
111 :
112 122 : static void cpu_records_free(struct cpu_event_history **p)
113 : {
114 122 : XFREE(MTYPE_EVENT_STATS, *p);
115 122 : }
116 :
117 0 : static void vty_out_cpu_event_history(struct vty *vty,
118 : struct cpu_event_history *a)
119 : {
120 0 : vty_out(vty,
121 : "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
122 0 : a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
123 0 : a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
124 0 : (a->real.total / a->total_calls), a->real.max,
125 0 : a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
126 0 : vty_out(vty, " %c%c%c%c%c %s\n",
127 0 : a->types & (1 << EVENT_READ) ? 'R' : ' ',
128 0 : a->types & (1 << EVENT_WRITE) ? 'W' : ' ',
129 0 : a->types & (1 << EVENT_TIMER) ? 'T' : ' ',
130 0 : a->types & (1 << EVENT_EVENT) ? 'E' : ' ',
131 0 : a->types & (1 << EVENT_EXECUTE) ? 'X' : ' ', a->funcname);
132 0 : }
133 :
134 0 : static void cpu_record_print_one(struct vty *vty, uint8_t filter,
135 : struct cpu_event_history *totals,
136 : const struct cpu_event_history *a)
137 : {
138 0 : struct cpu_event_history copy;
139 :
140 0 : copy.total_active =
141 0 : atomic_load_explicit(&a->total_active, memory_order_seq_cst);
142 0 : copy.total_calls =
143 0 : atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
144 0 : copy.total_cpu_warn =
145 0 : atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
146 0 : copy.total_wall_warn =
147 0 : atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
148 0 : copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
149 : memory_order_seq_cst);
150 0 : copy.cpu.total =
151 0 : atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
152 0 : copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
153 0 : copy.real.total =
154 0 : atomic_load_explicit(&a->real.total, memory_order_seq_cst);
155 0 : copy.real.max =
156 0 : atomic_load_explicit(&a->real.max, memory_order_seq_cst);
157 0 : copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
158 0 : copy.funcname = a->funcname;
159 :
160 0 : if (!(copy.types & filter))
161 0 : return;
162 :
163 0 : vty_out_cpu_event_history(vty, ©);
164 0 : totals->total_active += copy.total_active;
165 0 : totals->total_calls += copy.total_calls;
166 0 : totals->total_cpu_warn += copy.total_cpu_warn;
167 0 : totals->total_wall_warn += copy.total_wall_warn;
168 0 : totals->total_starv_warn += copy.total_starv_warn;
169 0 : totals->real.total += copy.real.total;
170 0 : if (totals->real.max < copy.real.max)
171 0 : totals->real.max = copy.real.max;
172 0 : totals->cpu.total += copy.cpu.total;
173 0 : if (totals->cpu.max < copy.cpu.max)
174 0 : totals->cpu.max = copy.cpu.max;
175 : }
176 :
177 0 : static void cpu_record_print(struct vty *vty, uint8_t filter)
178 : {
179 0 : struct cpu_event_history tmp;
180 0 : struct event_loop *m;
181 0 : struct listnode *ln;
182 :
183 0 : if (!cputime_enabled)
184 0 : vty_out(vty,
185 : "\n"
186 : "Collecting CPU time statistics is currently disabled. Following statistics\n"
187 : "will be zero or may display data from when collection was enabled. Use the\n"
188 : " \"service cputime-stats\" command to start collecting data.\n"
189 : "\nCounters and wallclock times are always maintained and should be accurate.\n");
190 :
191 0 : memset(&tmp, 0, sizeof(tmp));
192 0 : tmp.funcname = "TOTAL";
193 0 : tmp.types = filter;
194 :
195 0 : frr_with_mutex (&masters_mtx) {
196 0 : for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
197 0 : const char *name = m->name ? m->name : "main";
198 0 : char underline[strlen(name) + 1];
199 :
200 0 : memset(underline, '-', sizeof(underline));
201 0 : underline[sizeof(underline) - 1] = '\0';
202 :
203 0 : vty_out(vty, "\n");
204 0 : vty_out(vty, "Showing statistics for pthread %s\n",
205 : name);
206 0 : vty_out(vty, "-------------------------------%s\n",
207 : underline);
208 0 : vty_out(vty, "%30s %18s %18s\n", "",
209 : "CPU (user+system):", "Real (wall-clock):");
210 0 : vty_out(vty,
211 : "Active Runtime(ms) Invoked Avg uSec Max uSecs");
212 0 : vty_out(vty, " Avg uSec Max uSecs");
213 0 : vty_out(vty,
214 : " CPU_Warn Wall_Warn Starv_Warn Type Thread\n");
215 :
216 0 : if (cpu_records_count(m->cpu_records)) {
217 0 : struct cpu_event_history *rec;
218 :
219 0 : frr_each (cpu_records, m->cpu_records, rec)
220 0 : cpu_record_print_one(vty, filter, &tmp,
221 : rec);
222 : } else
223 0 : vty_out(vty, "No data to display yet.\n");
224 :
225 0 : vty_out(vty, "\n");
226 : }
227 : }
228 :
229 0 : vty_out(vty, "\n");
230 0 : vty_out(vty, "Total thread statistics\n");
231 0 : vty_out(vty, "-------------------------\n");
232 0 : vty_out(vty, "%30s %18s %18s\n", "",
233 : "CPU (user+system):", "Real (wall-clock):");
234 0 : vty_out(vty, "Active Runtime(ms) Invoked Avg uSec Max uSecs");
235 0 : vty_out(vty, " Avg uSec Max uSecs CPU_Warn Wall_Warn");
236 0 : vty_out(vty, " Type Thread\n");
237 :
238 0 : if (tmp.total_calls > 0)
239 0 : vty_out_cpu_event_history(vty, &tmp);
240 0 : }
241 :
242 0 : static void cpu_record_clear(uint8_t filter)
243 : {
244 0 : struct event_loop *m;
245 0 : struct listnode *ln;
246 :
247 0 : frr_with_mutex (&masters_mtx) {
248 0 : for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
249 0 : frr_with_mutex (&m->mtx) {
250 0 : struct cpu_event_history *item;
251 0 : struct cpu_records_head old[1];
252 :
253 0 : cpu_records_init(old);
254 0 : cpu_records_swap_all(old, m->cpu_records);
255 :
256 0 : while ((item = cpu_records_pop(old))) {
257 0 : if (item->types & filter)
258 0 : cpu_records_free(&item);
259 : else
260 0 : cpu_records_add(m->cpu_records,
261 : item);
262 : }
263 :
264 0 : cpu_records_fini(old);
265 : }
266 : }
267 : }
268 0 : }
269 :
270 0 : static uint8_t parse_filter(const char *filterstr)
271 : {
272 0 : int i = 0;
273 0 : int filter = 0;
274 :
275 0 : while (filterstr[i] != '\0') {
276 0 : switch (filterstr[i]) {
277 0 : case 'r':
278 : case 'R':
279 0 : filter |= (1 << EVENT_READ);
280 0 : break;
281 0 : case 'w':
282 : case 'W':
283 0 : filter |= (1 << EVENT_WRITE);
284 0 : break;
285 0 : case 't':
286 : case 'T':
287 0 : filter |= (1 << EVENT_TIMER);
288 0 : break;
289 0 : case 'e':
290 : case 'E':
291 0 : filter |= (1 << EVENT_EVENT);
292 0 : break;
293 0 : case 'x':
294 : case 'X':
295 0 : filter |= (1 << EVENT_EXECUTE);
296 0 : break;
297 : default:
298 : break;
299 : }
300 0 : ++i;
301 : }
302 0 : return filter;
303 : }
304 :
305 0 : DEFUN_NOSH (show_thread_cpu,
306 : show_thread_cpu_cmd,
307 : "show thread cpu [FILTER]",
308 : SHOW_STR
309 : "Thread information\n"
310 : "Thread CPU usage\n"
311 : "Display filter (rwtex)\n")
312 : {
313 0 : uint8_t filter = (uint8_t)-1U;
314 0 : int idx = 0;
315 :
316 0 : if (argv_find(argv, argc, "FILTER", &idx)) {
317 0 : filter = parse_filter(argv[idx]->arg);
318 0 : if (!filter) {
319 0 : vty_out(vty,
320 : "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
321 : argv[idx]->arg);
322 0 : return CMD_WARNING;
323 : }
324 : }
325 :
326 0 : cpu_record_print(vty, filter);
327 0 : return CMD_SUCCESS;
328 : }
329 :
330 0 : DEFPY (service_cputime_stats,
331 : service_cputime_stats_cmd,
332 : "[no] service cputime-stats",
333 : NO_STR
334 : "Set up miscellaneous service\n"
335 : "Collect CPU usage statistics\n")
336 : {
337 0 : cputime_enabled = !no;
338 0 : return CMD_SUCCESS;
339 : }
340 :
341 0 : DEFPY (service_cputime_warning,
342 : service_cputime_warning_cmd,
343 : "[no] service cputime-warning ![(1-4294967295)]",
344 : NO_STR
345 : "Set up miscellaneous service\n"
346 : "Warn for tasks exceeding CPU usage threshold\n"
347 : "Warning threshold in milliseconds\n")
348 : {
349 0 : if (no)
350 0 : cputime_threshold = 0;
351 : else
352 0 : cputime_threshold = cputime_warning * 1000;
353 : return CMD_SUCCESS;
354 : }
355 :
356 0 : DEFPY (service_walltime_warning,
357 : service_walltime_warning_cmd,
358 : "[no] service walltime-warning ![(1-4294967295)]",
359 : NO_STR
360 : "Set up miscellaneous service\n"
361 : "Warn for tasks exceeding total wallclock threshold\n"
362 : "Warning threshold in milliseconds\n")
363 : {
364 0 : if (no)
365 0 : walltime_threshold = 0;
366 : else
367 0 : walltime_threshold = walltime_warning * 1000;
368 : return CMD_SUCCESS;
369 : }
370 :
371 0 : static void show_thread_poll_helper(struct vty *vty, struct event_loop *m)
372 0 : {
373 0 : const char *name = m->name ? m->name : "main";
374 0 : char underline[strlen(name) + 1];
375 0 : struct event *thread;
376 0 : uint32_t i;
377 :
378 0 : memset(underline, '-', sizeof(underline));
379 0 : underline[sizeof(underline) - 1] = '\0';
380 :
381 0 : vty_out(vty, "\nShowing poll FD's for %s\n", name);
382 0 : vty_out(vty, "----------------------%s\n", underline);
383 0 : vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
384 : m->fd_limit);
385 0 : for (i = 0; i < m->handler.pfdcount; i++) {
386 0 : vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
387 0 : m->handler.pfds[i].fd, m->handler.pfds[i].events,
388 0 : m->handler.pfds[i].revents);
389 :
390 0 : if (m->handler.pfds[i].events & POLLIN) {
391 0 : thread = m->read[m->handler.pfds[i].fd];
392 :
393 0 : if (!thread)
394 0 : vty_out(vty, "ERROR ");
395 : else
396 0 : vty_out(vty, "%s ", thread->xref->funcname);
397 : } else
398 0 : vty_out(vty, " ");
399 :
400 0 : if (m->handler.pfds[i].events & POLLOUT) {
401 0 : thread = m->write[m->handler.pfds[i].fd];
402 :
403 0 : if (!thread)
404 0 : vty_out(vty, "ERROR\n");
405 : else
406 0 : vty_out(vty, "%s\n", thread->xref->funcname);
407 : } else
408 0 : vty_out(vty, "\n");
409 : }
410 0 : }
411 :
412 0 : DEFUN_NOSH (show_thread_poll,
413 : show_thread_poll_cmd,
414 : "show thread poll",
415 : SHOW_STR
416 : "Thread information\n"
417 : "Show poll FD's and information\n")
418 : {
419 0 : struct listnode *node;
420 0 : struct event_loop *m;
421 :
422 0 : frr_with_mutex (&masters_mtx) {
423 0 : for (ALL_LIST_ELEMENTS_RO(masters, node, m))
424 0 : show_thread_poll_helper(vty, m);
425 : }
426 :
427 0 : return CMD_SUCCESS;
428 : }
429 :
430 :
431 0 : DEFUN (clear_thread_cpu,
432 : clear_thread_cpu_cmd,
433 : "clear thread cpu [FILTER]",
434 : "Clear stored data in all pthreads\n"
435 : "Thread information\n"
436 : "Thread CPU usage\n"
437 : "Display filter (rwtexb)\n")
438 : {
439 0 : uint8_t filter = (uint8_t)-1U;
440 0 : int idx = 0;
441 :
442 0 : if (argv_find(argv, argc, "FILTER", &idx)) {
443 0 : filter = parse_filter(argv[idx]->arg);
444 0 : if (!filter) {
445 0 : vty_out(vty,
446 : "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
447 : argv[idx]->arg);
448 0 : return CMD_WARNING;
449 : }
450 : }
451 :
452 0 : cpu_record_clear(filter);
453 0 : return CMD_SUCCESS;
454 : }
455 :
456 0 : static void show_thread_timers_helper(struct vty *vty, struct event_loop *m)
457 0 : {
458 0 : const char *name = m->name ? m->name : "main";
459 0 : char underline[strlen(name) + 1];
460 0 : struct event *thread;
461 :
462 0 : memset(underline, '-', sizeof(underline));
463 0 : underline[sizeof(underline) - 1] = '\0';
464 :
465 0 : vty_out(vty, "\nShowing timers for %s\n", name);
466 0 : vty_out(vty, "-------------------%s\n", underline);
467 :
468 0 : frr_each (event_timer_list, &m->timer, thread) {
469 0 : vty_out(vty, " %-50s%pTH\n", thread->hist->funcname, thread);
470 : }
471 0 : }
472 :
473 0 : DEFPY_NOSH (show_thread_timers,
474 : show_thread_timers_cmd,
475 : "show thread timers",
476 : SHOW_STR
477 : "Thread information\n"
478 : "Show all timers and how long they have in the system\n")
479 : {
480 0 : struct listnode *node;
481 0 : struct event_loop *m;
482 :
483 0 : frr_with_mutex (&masters_mtx) {
484 0 : for (ALL_LIST_ELEMENTS_RO(masters, node, m))
485 0 : show_thread_timers_helper(vty, m);
486 : }
487 :
488 0 : return CMD_SUCCESS;
489 : }
490 :
491 4 : void event_cmd_init(void)
492 : {
493 4 : install_element(VIEW_NODE, &show_thread_cpu_cmd);
494 4 : install_element(VIEW_NODE, &show_thread_poll_cmd);
495 4 : install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
496 :
497 4 : install_element(CONFIG_NODE, &service_cputime_stats_cmd);
498 4 : install_element(CONFIG_NODE, &service_cputime_warning_cmd);
499 4 : install_element(CONFIG_NODE, &service_walltime_warning_cmd);
500 :
501 4 : install_element(VIEW_NODE, &show_thread_timers_cmd);
502 4 : }
503 : /* CLI end ------------------------------------------------------------------ */
504 :
505 :
506 194 : static void cancelreq_del(void *cr)
507 : {
508 194 : XFREE(MTYPE_TMP, cr);
509 194 : }
510 :
511 : /* initializer, only ever called once */
512 4 : static void initializer(void)
513 : {
514 4 : pthread_key_create(&thread_current, NULL);
515 4 : }
516 :
517 18 : struct event_loop *event_master_create(const char *name)
518 18 : {
519 18 : struct event_loop *rv;
520 18 : struct rlimit limit;
521 :
522 18 : pthread_once(&init_once, &initializer);
523 :
524 18 : rv = XCALLOC(MTYPE_EVENT_MASTER, sizeof(struct event_loop));
525 :
526 : /* Initialize master mutex */
527 18 : pthread_mutex_init(&rv->mtx, NULL);
528 18 : pthread_cond_init(&rv->cancel_cond, NULL);
529 :
530 : /* Set name */
531 18 : name = name ? name : "default";
532 18 : rv->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
533 :
534 : /* Initialize I/O task data structures */
535 :
536 : /* Use configured limit if present, ulimit otherwise. */
537 18 : rv->fd_limit = frr_get_fd_limit();
538 18 : if (rv->fd_limit == 0) {
539 18 : getrlimit(RLIMIT_NOFILE, &limit);
540 18 : rv->fd_limit = (int)limit.rlim_cur;
541 : }
542 :
543 18 : rv->read = XCALLOC(MTYPE_EVENT_POLL,
544 : sizeof(struct event *) * rv->fd_limit);
545 :
546 18 : rv->write = XCALLOC(MTYPE_EVENT_POLL,
547 : sizeof(struct event *) * rv->fd_limit);
548 :
549 18 : char tmhashname[strlen(name) + 32];
550 :
551 18 : snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
552 : name);
553 18 : cpu_records_init(rv->cpu_records);
554 :
555 18 : event_list_init(&rv->event);
556 18 : event_list_init(&rv->ready);
557 18 : event_list_init(&rv->unuse);
558 18 : event_timer_list_init(&rv->timer);
559 :
560 : /* Initialize event_fetch() settings */
561 18 : rv->spin = true;
562 18 : rv->handle_signals = true;
563 :
564 : /* Set pthread owner, should be updated by actual owner */
565 18 : rv->owner = pthread_self();
566 18 : rv->cancel_req = list_new();
567 18 : rv->cancel_req->del = cancelreq_del;
568 18 : rv->canceled = true;
569 :
570 : /* Initialize pipe poker */
571 18 : pipe(rv->io_pipe);
572 18 : set_nonblocking(rv->io_pipe[0]);
573 18 : set_nonblocking(rv->io_pipe[1]);
574 :
575 : /* Initialize data structures for poll() */
576 18 : rv->handler.pfdsize = rv->fd_limit;
577 18 : rv->handler.pfdcount = 0;
578 18 : rv->handler.pfds = XCALLOC(MTYPE_EVENT_MASTER,
579 : sizeof(struct pollfd) * rv->handler.pfdsize);
580 18 : rv->handler.copy = XCALLOC(MTYPE_EVENT_MASTER,
581 : sizeof(struct pollfd) * rv->handler.pfdsize);
582 :
583 : /* add to list of threadmasters */
584 36 : frr_with_mutex (&masters_mtx) {
585 18 : if (!masters)
586 4 : masters = list_new();
587 :
588 18 : listnode_add(masters, rv);
589 : }
590 :
591 18 : return rv;
592 : }
593 :
594 0 : void event_master_set_name(struct event_loop *master, const char *name)
595 : {
596 0 : frr_with_mutex (&master->mtx) {
597 0 : XFREE(MTYPE_EVENT_MASTER, master->name);
598 0 : master->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
599 : }
600 0 : }
601 :
602 : #define EVENT_UNUSED_DEPTH 10
603 :
604 : /* Move thread to unuse list. */
605 685 : static void thread_add_unuse(struct event_loop *m, struct event *thread)
606 : {
607 685 : pthread_mutex_t mtxc = thread->mtx;
608 :
609 685 : assert(m != NULL && thread != NULL);
610 :
611 685 : thread->hist->total_active--;
612 685 : memset(thread, 0, sizeof(struct event));
613 685 : thread->type = EVENT_UNUSED;
614 :
615 : /* Restore the thread mutex context. */
616 685 : thread->mtx = mtxc;
617 :
618 685 : if (event_list_count(&m->unuse) < EVENT_UNUSED_DEPTH) {
619 679 : event_list_add_tail(&m->unuse, thread);
620 679 : return;
621 : }
622 :
623 6 : thread_free(m, thread);
624 : }
625 :
626 : /* Free all unused thread. */
627 54 : static void thread_list_free(struct event_loop *m, struct event_list_head *list)
628 : {
629 54 : struct event *t;
630 :
631 110 : while ((t = event_list_pop(list)))
632 56 : thread_free(m, t);
633 54 : }
634 :
635 36 : static void thread_array_free(struct event_loop *m, struct event **thread_array)
636 : {
637 36 : struct event *t;
638 36 : int index;
639 :
640 36900 : for (index = 0; index < m->fd_limit; ++index) {
641 36864 : t = thread_array[index];
642 36864 : if (t) {
643 18 : thread_array[index] = NULL;
644 18 : thread_free(m, t);
645 : }
646 : }
647 36 : XFREE(MTYPE_EVENT_POLL, thread_array);
648 36 : }
649 :
650 : /*
651 : * event_master_free_unused
652 : *
653 : * As threads are finished with they are put on the
654 : * unuse list for later reuse.
655 : * If we are shutting down, Free up unused threads
656 : * So we can see if we forget to shut anything off
657 : */
658 2 : void event_master_free_unused(struct event_loop *m)
659 : {
660 4 : frr_with_mutex (&m->mtx) {
661 : struct event *t;
662 :
663 21 : while ((t = event_list_pop(&m->unuse)))
664 19 : thread_free(m, t);
665 : }
666 2 : }
667 :
668 : /* Stop thread scheduler. */
669 18 : void event_master_free(struct event_loop *m)
670 : {
671 18 : struct cpu_event_history *record;
672 18 : struct event *t;
673 :
674 36 : frr_with_mutex (&masters_mtx) {
675 18 : listnode_delete(masters, m);
676 18 : if (masters->count == 0)
677 4 : list_delete(&masters);
678 : }
679 :
680 18 : thread_array_free(m, m->read);
681 18 : thread_array_free(m, m->write);
682 24 : while ((t = event_timer_list_pop(&m->timer)))
683 6 : thread_free(m, t);
684 18 : thread_list_free(m, &m->event);
685 18 : thread_list_free(m, &m->ready);
686 18 : thread_list_free(m, &m->unuse);
687 18 : pthread_mutex_destroy(&m->mtx);
688 18 : pthread_cond_destroy(&m->cancel_cond);
689 18 : close(m->io_pipe[0]);
690 18 : close(m->io_pipe[1]);
691 18 : list_delete(&m->cancel_req);
692 18 : m->cancel_req = NULL;
693 :
694 140 : while ((record = cpu_records_pop(m->cpu_records)))
695 122 : cpu_records_free(&record);
696 18 : cpu_records_fini(m->cpu_records);
697 :
698 18 : XFREE(MTYPE_EVENT_MASTER, m->name);
699 18 : XFREE(MTYPE_EVENT_MASTER, m->handler.pfds);
700 18 : XFREE(MTYPE_EVENT_MASTER, m->handler.copy);
701 18 : XFREE(MTYPE_EVENT_MASTER, m);
702 18 : }
703 :
704 : /* Return remain time in milliseconds. */
705 6 : unsigned long event_timer_remain_msec(struct event *thread)
706 : {
707 6 : int64_t remain;
708 :
709 6 : if (!event_is_scheduled(thread))
710 : return 0;
711 :
712 6 : frr_with_mutex (&thread->mtx) {
713 6 : remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
714 : }
715 :
716 6 : return remain < 0 ? 0 : remain;
717 : }
718 :
719 : /* Return remain time in seconds. */
720 6 : unsigned long event_timer_remain_second(struct event *thread)
721 : {
722 6 : return event_timer_remain_msec(thread) / 1000LL;
723 : }
724 :
725 0 : struct timeval event_timer_remain(struct event *thread)
726 : {
727 0 : struct timeval remain;
728 :
729 0 : frr_with_mutex (&thread->mtx) {
730 0 : monotime_until(&thread->u.sands, &remain);
731 : }
732 0 : return remain;
733 : }
734 :
735 0 : static int time_hhmmss(char *buf, int buf_size, long sec)
736 : {
737 0 : long hh;
738 0 : long mm;
739 0 : int wr;
740 :
741 0 : assert(buf_size >= 8);
742 :
743 0 : hh = sec / 3600;
744 0 : sec %= 3600;
745 0 : mm = sec / 60;
746 0 : sec %= 60;
747 :
748 0 : wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
749 :
750 0 : return wr != 8;
751 : }
752 :
753 0 : char *event_timer_to_hhmmss(char *buf, int buf_size, struct event *t_timer)
754 : {
755 0 : if (t_timer)
756 0 : time_hhmmss(buf, buf_size, event_timer_remain_second(t_timer));
757 : else
758 0 : snprintf(buf, buf_size, "--:--:--");
759 :
760 0 : return buf;
761 : }
762 :
763 : /* Get new thread. */
764 717 : static struct event *thread_get(struct event_loop *m, uint8_t type,
765 : void (*func)(struct event *), void *arg,
766 : const struct xref_eventsched *xref)
767 : {
768 717 : struct event *thread = event_list_pop(&m->unuse);
769 :
770 717 : if (!thread) {
771 105 : thread = XCALLOC(MTYPE_THREAD, sizeof(struct event));
772 : /* mutex only needs to be initialized at struct creation. */
773 105 : pthread_mutex_init(&thread->mtx, NULL);
774 105 : m->alloc++;
775 : }
776 :
777 717 : thread->type = type;
778 717 : thread->add_type = type;
779 717 : thread->master = m;
780 717 : thread->arg = arg;
781 717 : thread->yield = EVENT_YIELD_TIME_SLOT; /* default */
782 : /* thread->ref is zeroed either by XCALLOC above or by memset before
783 : * being put on the "unuse" list by thread_add_unuse().
784 : * Setting it here again makes coverity complain about a missing
785 : * lock :(
786 : */
787 : /* thread->ref = NULL; */
788 717 : thread->ignore_timer_late = false;
789 :
790 : /*
791 : * So if the passed in funcname is not what we have
792 : * stored that means the thread->hist needs to be
793 : * updated. We keep the last one around in unused
794 : * under the assumption that we are probably
795 : * going to immediately allocate the same
796 : * type of thread.
797 : * This hopefully saves us some serious
798 : * hash_get lookups.
799 : */
800 717 : if ((thread->xref && thread->xref->funcname != xref->funcname)
801 717 : || thread->func != func)
802 717 : thread->hist = cpu_records_get(m, func, xref->funcname);
803 :
804 717 : thread->hist->total_active++;
805 717 : thread->func = func;
806 717 : thread->xref = xref;
807 :
808 717 : return thread;
809 : }
810 :
811 105 : static void thread_free(struct event_loop *master, struct event *thread)
812 : {
813 : /* Update statistics. */
814 105 : assert(master->alloc > 0);
815 105 : master->alloc--;
816 :
817 : /* Free allocated resources. */
818 105 : pthread_mutex_destroy(&thread->mtx);
819 105 : XFREE(MTYPE_THREAD, thread);
820 105 : }
821 :
822 6858 : static int fd_poll(struct event_loop *m, const struct timeval *timer_wait,
823 : bool *eintr_p)
824 : {
825 6858 : sigset_t origsigs;
826 6858 : unsigned char trash[64];
827 6858 : nfds_t count = m->handler.copycount;
828 :
829 : /*
830 : * If timer_wait is null here, that means poll() should block
831 : * indefinitely, unless the event_master has overridden it by setting
832 : * ->selectpoll_timeout.
833 : *
834 : * If the value is positive, it specifies the maximum number of
835 : * milliseconds to wait. If the timeout is -1, it specifies that
836 : * we should never wait and always return immediately even if no
837 : * event is detected. If the value is zero, the behavior is default.
838 : */
839 6858 : int timeout = -1;
840 :
841 : /* number of file descriptors with events */
842 6858 : int num;
843 :
844 6858 : if (timer_wait != NULL && m->selectpoll_timeout == 0) {
845 : /* use the default value */
846 6560 : timeout = (timer_wait->tv_sec * 1000)
847 6560 : + (timer_wait->tv_usec / 1000);
848 298 : } else if (m->selectpoll_timeout > 0) {
849 : /* use the user's timeout */
850 0 : timeout = m->selectpoll_timeout;
851 298 : } else if (m->selectpoll_timeout < 0) {
852 : /* effect a poll (return immediately) */
853 0 : timeout = 0;
854 : }
855 :
856 6858 : zlog_tls_buffer_flush();
857 6858 : rcu_read_unlock();
858 6858 : rcu_assert_read_unlocked();
859 :
860 : /* add poll pipe poker */
861 6858 : assert(count + 1 < m->handler.pfdsize);
862 6858 : m->handler.copy[count].fd = m->io_pipe[0];
863 6858 : m->handler.copy[count].events = POLLIN;
864 6858 : m->handler.copy[count].revents = 0x00;
865 :
866 : /* We need to deal with a signal-handling race here: we
867 : * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
868 : * that may arrive just before we enter poll(). We will block the
869 : * key signals, then check whether any have arrived - if so, we return
870 : * before calling poll(). If not, we'll re-enable the signals
871 : * in the ppoll() call.
872 : */
873 :
874 6858 : sigemptyset(&origsigs);
875 6858 : if (m->handle_signals) {
876 : /* Main pthread that handles the app signals */
877 6529 : if (frr_sigevent_check(&origsigs)) {
878 : /* Signal to process - restore signal mask and return */
879 0 : pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
880 0 : num = -1;
881 0 : *eintr_p = true;
882 0 : goto done;
883 : }
884 : } else {
885 : /* Don't make any changes for the non-main pthreads */
886 329 : pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
887 : }
888 :
889 : #if defined(HAVE_PPOLL)
890 6858 : struct timespec ts, *tsp;
891 :
892 6858 : if (timeout >= 0) {
893 6560 : ts.tv_sec = timeout / 1000;
894 6560 : ts.tv_nsec = (timeout % 1000) * 1000000;
895 6560 : tsp = &ts;
896 : } else
897 : tsp = NULL;
898 :
899 6858 : num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
900 6858 : pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
901 : #else
902 : /* Not ideal - there is a race after we restore the signal mask */
903 : pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
904 : num = poll(m->handler.copy, count + 1, timeout);
905 : #endif
906 :
907 6858 : done:
908 :
909 6858 : if (num < 0 && errno == EINTR)
910 4 : *eintr_p = true;
911 :
912 6858 : if (num > 0 && m->handler.copy[count].revents != 0 && num--)
913 1034 : while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
914 : ;
915 :
916 6858 : rcu_read_lock();
917 :
918 6858 : return num;
919 : }
920 :
921 : /* Add new read thread. */
922 544 : void _event_add_read_write(const struct xref_eventsched *xref,
923 : struct event_loop *m, void (*func)(struct event *),
924 : void *arg, int fd, struct event **t_ptr)
925 : {
926 544 : int dir = xref->event_type;
927 544 : struct event *thread = NULL;
928 544 : struct event **thread_array;
929 :
930 544 : if (dir == EVENT_READ)
931 544 : frrtrace(9, frr_libfrr, schedule_read, m,
932 : xref->funcname, xref->xref.file, xref->xref.line,
933 : t_ptr, fd, 0, arg, 0);
934 : else
935 544 : frrtrace(9, frr_libfrr, schedule_write, m,
936 : xref->funcname, xref->xref.file, xref->xref.line,
937 : t_ptr, fd, 0, arg, 0);
938 :
939 544 : assert(fd >= 0);
940 544 : if (fd >= m->fd_limit)
941 0 : assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
942 :
943 544 : frr_with_mutex (&m->mtx) {
944 : /* Thread is already scheduled; don't reschedule */
945 544 : if (t_ptr && *t_ptr)
946 : break;
947 :
948 : /* default to a new pollfd */
949 417 : nfds_t queuepos = m->handler.pfdcount;
950 :
951 417 : if (dir == EVENT_READ)
952 353 : thread_array = m->read;
953 : else
954 64 : thread_array = m->write;
955 :
956 : /*
957 : * if we already have a pollfd for our file descriptor, find and
958 : * use it
959 : */
960 1469 : for (nfds_t i = 0; i < m->handler.pfdcount; i++)
961 1403 : if (m->handler.pfds[i].fd == fd) {
962 : queuepos = i;
963 :
964 : #ifdef DEV_BUILD
965 : /*
966 : * What happens if we have a thread already
967 : * created for this event?
968 : */
969 : if (thread_array[fd])
970 : assert(!"Thread already scheduled for file descriptor");
971 : #endif
972 : break;
973 : }
974 :
975 : /* make sure we have room for this fd + pipe poker fd */
976 417 : assert(queuepos + 1 < m->handler.pfdsize);
977 :
978 417 : thread = thread_get(m, dir, func, arg, xref);
979 :
980 417 : m->handler.pfds[queuepos].fd = fd;
981 417 : m->handler.pfds[queuepos].events |=
982 : (dir == EVENT_READ ? POLLIN : POLLOUT);
983 :
984 417 : if (queuepos == m->handler.pfdcount)
985 66 : m->handler.pfdcount++;
986 :
987 417 : if (thread) {
988 417 : frr_with_mutex (&thread->mtx) {
989 417 : thread->u.fd = fd;
990 417 : thread_array[thread->u.fd] = thread;
991 : }
992 :
993 417 : if (t_ptr) {
994 397 : *t_ptr = thread;
995 397 : thread->ref = t_ptr;
996 : }
997 : }
998 :
999 417 : AWAKEN(m);
1000 : }
1001 544 : }
1002 :
1003 163 : static void _event_add_timer_timeval(const struct xref_eventsched *xref,
1004 : struct event_loop *m,
1005 : void (*func)(struct event *), void *arg,
1006 : struct timeval *time_relative,
1007 : struct event **t_ptr)
1008 : {
1009 163 : struct event *thread;
1010 163 : struct timeval t;
1011 :
1012 163 : assert(m != NULL);
1013 :
1014 163 : assert(time_relative);
1015 :
1016 163 : frrtrace(9, frr_libfrr, schedule_timer, m,
1017 : xref->funcname, xref->xref.file, xref->xref.line,
1018 : t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
1019 :
1020 : /* Compute expiration/deadline time. */
1021 163 : monotime(&t);
1022 163 : timeradd(&t, time_relative, &t);
1023 :
1024 169 : frr_with_mutex (&m->mtx) {
1025 163 : if (t_ptr && *t_ptr)
1026 : /* thread is already scheduled; don't reschedule */
1027 6 : return;
1028 :
1029 157 : thread = thread_get(m, EVENT_TIMER, func, arg, xref);
1030 :
1031 314 : frr_with_mutex (&thread->mtx) {
1032 157 : thread->u.sands = t;
1033 157 : event_timer_list_add(&m->timer, thread);
1034 157 : if (t_ptr) {
1035 157 : *t_ptr = thread;
1036 157 : thread->ref = t_ptr;
1037 : }
1038 : }
1039 :
1040 : /* The timer list is sorted - if this new timer
1041 : * might change the time we'll wait for, give the pthread
1042 : * a chance to re-compute.
1043 : */
1044 314 : if (event_timer_list_first(&m->timer) == thread)
1045 236 : AWAKEN(m);
1046 : }
1047 : #define ONEYEAR2SEC (60 * 60 * 24 * 365)
1048 157 : if (time_relative->tv_sec > ONEYEAR2SEC)
1049 157 : flog_err(
1050 : EC_LIB_TIMER_TOO_LONG,
1051 : "Timer: %pTHD is created with an expiration that is greater than 1 year",
1052 : thread);
1053 : }
1054 :
1055 :
1056 : /* Add timer event thread. */
1057 108 : void _event_add_timer(const struct xref_eventsched *xref, struct event_loop *m,
1058 : void (*func)(struct event *), void *arg, long timer,
1059 : struct event **t_ptr)
1060 : {
1061 108 : struct timeval trel;
1062 :
1063 108 : assert(m != NULL);
1064 :
1065 108 : trel.tv_sec = timer;
1066 108 : trel.tv_usec = 0;
1067 :
1068 108 : _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1069 108 : }
1070 :
1071 : /* Add timer event thread with "millisecond" resolution */
1072 55 : void _event_add_timer_msec(const struct xref_eventsched *xref,
1073 : struct event_loop *m, void (*func)(struct event *),
1074 : void *arg, long timer, struct event **t_ptr)
1075 : {
1076 55 : struct timeval trel;
1077 :
1078 55 : assert(m != NULL);
1079 :
1080 55 : trel.tv_sec = timer / 1000;
1081 55 : trel.tv_usec = 1000 * (timer % 1000);
1082 :
1083 55 : _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1084 55 : }
1085 :
1086 : /* Add timer event thread with "timeval" resolution */
1087 0 : void _event_add_timer_tv(const struct xref_eventsched *xref,
1088 : struct event_loop *m, void (*func)(struct event *),
1089 : void *arg, struct timeval *tv, struct event **t_ptr)
1090 : {
1091 0 : _event_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
1092 0 : }
1093 :
1094 : /* Add simple event thread. */
1095 291 : void _event_add_event(const struct xref_eventsched *xref, struct event_loop *m,
1096 : void (*func)(struct event *), void *arg, int val,
1097 : struct event **t_ptr)
1098 : {
1099 291 : struct event *thread = NULL;
1100 :
1101 291 : frrtrace(9, frr_libfrr, schedule_event, m,
1102 : xref->funcname, xref->xref.file, xref->xref.line,
1103 : t_ptr, 0, val, arg, 0);
1104 :
1105 291 : assert(m != NULL);
1106 :
1107 291 : frr_with_mutex (&m->mtx) {
1108 291 : if (t_ptr && *t_ptr)
1109 : /* thread is already scheduled; don't reschedule */
1110 : break;
1111 :
1112 143 : thread = thread_get(m, EVENT_EVENT, func, arg, xref);
1113 143 : frr_with_mutex (&thread->mtx) {
1114 143 : thread->u.val = val;
1115 143 : event_list_add_tail(&m->event, thread);
1116 : }
1117 :
1118 143 : if (t_ptr) {
1119 115 : *t_ptr = thread;
1120 115 : thread->ref = t_ptr;
1121 : }
1122 :
1123 143 : AWAKEN(m);
1124 : }
1125 291 : }
1126 :
1127 : /* Thread cancellation ------------------------------------------------------ */
1128 :
1129 : /**
1130 : * NOT's out the .events field of pollfd corresponding to the given file
1131 : * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1132 : *
1133 : * This needs to happen for both copies of pollfd's. See 'event_fetch'
1134 : * implementation for details.
1135 : *
1136 : * @param master
1137 : * @param fd
1138 : * @param state the event to cancel. One or more (OR'd together) of the
1139 : * following:
1140 : * - POLLIN
1141 : * - POLLOUT
1142 : */
1143 31 : static void event_cancel_rw(struct event_loop *master, int fd, short state,
1144 : int idx_hint)
1145 : {
1146 31 : bool found = false;
1147 :
1148 : /* find the index of corresponding pollfd */
1149 31 : nfds_t i;
1150 :
1151 : /* Cancel POLLHUP too just in case some bozo set it */
1152 31 : state |= POLLHUP;
1153 :
1154 : /* Some callers know the index of the pfd already */
1155 31 : if (idx_hint >= 0) {
1156 0 : i = idx_hint;
1157 0 : found = true;
1158 : } else {
1159 : /* Have to look for the fd in the pfd array */
1160 88 : for (i = 0; i < master->handler.pfdcount; i++)
1161 88 : if (master->handler.pfds[i].fd == fd) {
1162 : found = true;
1163 : break;
1164 : }
1165 : }
1166 :
1167 31 : if (!found) {
1168 0 : zlog_debug(
1169 : "[!] Received cancellation request for nonexistent rw job");
1170 0 : zlog_debug("[!] threadmaster: %s | fd: %d",
1171 : master->name ? master->name : "", fd);
1172 0 : return;
1173 : }
1174 :
1175 : /* NOT out event. */
1176 31 : master->handler.pfds[i].events &= ~(state);
1177 :
1178 : /* If all events are canceled, delete / resize the pollfd array. */
1179 31 : if (master->handler.pfds[i].events == 0) {
1180 31 : memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1181 31 : (master->handler.pfdcount - i - 1)
1182 : * sizeof(struct pollfd));
1183 31 : master->handler.pfdcount--;
1184 31 : master->handler.pfds[master->handler.pfdcount].fd = 0;
1185 31 : master->handler.pfds[master->handler.pfdcount].events = 0;
1186 : }
1187 :
1188 : /*
1189 : * If we have the same pollfd in the copy, perform the same operations,
1190 : * otherwise return.
1191 : */
1192 31 : if (i >= master->handler.copycount)
1193 : return;
1194 :
1195 31 : master->handler.copy[i].events &= ~(state);
1196 :
1197 31 : if (master->handler.copy[i].events == 0) {
1198 29 : memmove(master->handler.copy + i, master->handler.copy + i + 1,
1199 29 : (master->handler.copycount - i - 1)
1200 : * sizeof(struct pollfd));
1201 29 : master->handler.copycount--;
1202 29 : master->handler.copy[master->handler.copycount].fd = 0;
1203 29 : master->handler.copy[master->handler.copycount].events = 0;
1204 : }
1205 : }
1206 :
1207 : /*
1208 : * Process task cancellation given a task argument: iterate through the
1209 : * various lists of tasks, looking for any that match the argument.
1210 : */
1211 40 : static void cancel_arg_helper(struct event_loop *master,
1212 : const struct cancel_req *cr)
1213 : {
1214 40 : struct event *t;
1215 40 : nfds_t i;
1216 40 : int fd;
1217 40 : struct pollfd *pfd;
1218 :
1219 : /* We're only processing arg-based cancellations here. */
1220 40 : if (cr->eventobj == NULL)
1221 : return;
1222 :
1223 : /* First process the ready lists. */
1224 56 : frr_each_safe (event_list, &master->event, t) {
1225 8 : if (t->arg != cr->eventobj)
1226 8 : continue;
1227 0 : event_list_del(&master->event, t);
1228 0 : if (t->ref)
1229 0 : *t->ref = NULL;
1230 0 : thread_add_unuse(master, t);
1231 : }
1232 :
1233 96 : frr_each_safe (event_list, &master->ready, t) {
1234 25 : if (t->arg != cr->eventobj)
1235 25 : continue;
1236 0 : event_list_del(&master->ready, t);
1237 0 : if (t->ref)
1238 0 : *t->ref = NULL;
1239 0 : thread_add_unuse(master, t);
1240 : }
1241 :
1242 : /* If requested, stop here and ignore io and timers */
1243 40 : if (CHECK_FLAG(cr->flags, EVENT_CANCEL_FLAG_READY))
1244 : return;
1245 :
1246 : /* Check the io tasks */
1247 39 : for (i = 0; i < master->handler.pfdcount;) {
1248 33 : pfd = master->handler.pfds + i;
1249 :
1250 33 : if (pfd->events & POLLIN)
1251 33 : t = master->read[pfd->fd];
1252 : else
1253 0 : t = master->write[pfd->fd];
1254 :
1255 33 : if (t && t->arg == cr->eventobj) {
1256 0 : fd = pfd->fd;
1257 :
1258 : /* Found a match to cancel: clean up fd arrays */
1259 0 : event_cancel_rw(master, pfd->fd, pfd->events, i);
1260 :
1261 : /* Clean up thread arrays */
1262 0 : master->read[fd] = NULL;
1263 0 : master->write[fd] = NULL;
1264 :
1265 : /* Clear caller's ref */
1266 0 : if (t->ref)
1267 0 : *t->ref = NULL;
1268 :
1269 0 : thread_add_unuse(master, t);
1270 :
1271 : /* Don't increment 'i' since the cancellation will have
1272 : * removed the entry from the pfd array
1273 : */
1274 : } else
1275 33 : i++;
1276 : }
1277 :
1278 : /* Check the timer tasks */
1279 6 : t = event_timer_list_first(&master->timer);
1280 42 : while (t) {
1281 36 : struct event *t_next;
1282 :
1283 36 : t_next = event_timer_list_next(&master->timer, t);
1284 :
1285 36 : if (t->arg == cr->eventobj) {
1286 0 : event_timer_list_del(&master->timer, t);
1287 0 : if (t->ref)
1288 0 : *t->ref = NULL;
1289 0 : thread_add_unuse(master, t);
1290 : }
1291 :
1292 : t = t_next;
1293 : }
1294 : }
1295 :
1296 : /**
1297 : * Process cancellation requests.
1298 : *
1299 : * This may only be run from the pthread which owns the event_master.
1300 : *
1301 : * @param master the thread master to process
1302 : * @REQUIRE master->mtx
1303 : */
1304 7579 : static void do_event_cancel(struct event_loop *master)
1305 : {
1306 7579 : struct event_list_head *list = NULL;
1307 7579 : struct event **thread_array = NULL;
1308 7579 : struct event *thread;
1309 7579 : struct cancel_req *cr;
1310 7579 : struct listnode *ln;
1311 :
1312 15352 : for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
1313 : /*
1314 : * If this is an event object cancellation, search
1315 : * through task lists deleting any tasks which have the
1316 : * specified argument - use this handy helper function.
1317 : */
1318 194 : if (cr->eventobj) {
1319 40 : cancel_arg_helper(master, cr);
1320 40 : continue;
1321 : }
1322 :
1323 : /*
1324 : * The pointer varies depending on whether the cancellation
1325 : * request was made asynchronously or not. If it was, we
1326 : * need to check whether the thread even exists anymore
1327 : * before cancelling it.
1328 : */
1329 154 : thread = (cr->thread) ? cr->thread : *cr->threadref;
1330 :
1331 154 : if (!thread)
1332 64 : continue;
1333 :
1334 90 : list = NULL;
1335 90 : thread_array = NULL;
1336 :
1337 : /* Determine the appropriate queue to cancel the thread from */
1338 90 : switch (thread->type) {
1339 31 : case EVENT_READ:
1340 31 : event_cancel_rw(master, thread->u.fd, POLLIN, -1);
1341 31 : thread_array = master->read;
1342 31 : break;
1343 0 : case EVENT_WRITE:
1344 0 : event_cancel_rw(master, thread->u.fd, POLLOUT, -1);
1345 0 : thread_array = master->write;
1346 0 : break;
1347 58 : case EVENT_TIMER:
1348 58 : event_timer_list_del(&master->timer, thread);
1349 58 : break;
1350 0 : case EVENT_EVENT:
1351 0 : list = &master->event;
1352 0 : break;
1353 1 : case EVENT_READY:
1354 1 : list = &master->ready;
1355 1 : break;
1356 0 : case EVENT_UNUSED:
1357 : case EVENT_EXECUTE:
1358 0 : continue;
1359 : break;
1360 : }
1361 :
1362 90 : if (list)
1363 1 : event_list_del(list, thread);
1364 89 : else if (thread_array)
1365 31 : thread_array[thread->u.fd] = NULL;
1366 :
1367 90 : if (thread->ref)
1368 90 : *thread->ref = NULL;
1369 :
1370 90 : thread_add_unuse(thread->master, thread);
1371 : }
1372 :
1373 : /* Delete and free all cancellation requests */
1374 7579 : if (master->cancel_req)
1375 7579 : list_delete_all_node(master->cancel_req);
1376 :
1377 : /* Wake up any threads which may be blocked in event_cancel_async() */
1378 7579 : master->canceled = true;
1379 7579 : pthread_cond_broadcast(&master->cancel_cond);
1380 7579 : }
1381 :
1382 : /*
1383 : * Helper function used for multiple flavors of arg-based cancellation.
1384 : */
1385 40 : static void cancel_event_helper(struct event_loop *m, void *arg, int flags)
1386 : {
1387 40 : struct cancel_req *cr;
1388 :
1389 40 : assert(m->owner == pthread_self());
1390 :
1391 : /* Only worth anything if caller supplies an arg. */
1392 40 : if (arg == NULL)
1393 : return;
1394 :
1395 40 : cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1396 :
1397 40 : cr->flags = flags;
1398 :
1399 80 : frr_with_mutex (&m->mtx) {
1400 40 : cr->eventobj = arg;
1401 40 : listnode_add(m->cancel_req, cr);
1402 40 : do_event_cancel(m);
1403 : }
1404 : }
1405 :
1406 : /**
1407 : * Cancel any events which have the specified argument.
1408 : *
1409 : * MT-Unsafe
1410 : *
1411 : * @param m the event_master to cancel from
1412 : * @param arg the argument passed when creating the event
1413 : */
1414 6 : void event_cancel_event(struct event_loop *master, void *arg)
1415 : {
1416 6 : cancel_event_helper(master, arg, 0);
1417 6 : }
1418 :
1419 : /*
1420 : * Cancel ready tasks with an arg matching 'arg'
1421 : *
1422 : * MT-Unsafe
1423 : *
1424 : * @param m the event_master to cancel from
1425 : * @param arg the argument passed when creating the event
1426 : */
1427 34 : void event_cancel_event_ready(struct event_loop *m, void *arg)
1428 : {
1429 :
1430 : /* Only cancel ready/event tasks */
1431 34 : cancel_event_helper(m, arg, EVENT_CANCEL_FLAG_READY);
1432 34 : }
1433 :
1434 : /**
1435 : * Cancel a specific task.
1436 : *
1437 : * MT-Unsafe
1438 : *
1439 : * @param thread task to cancel
1440 : */
1441 86 : void event_cancel(struct event **thread)
1442 : {
1443 86 : struct event_loop *master;
1444 :
1445 86 : if (thread == NULL || *thread == NULL)
1446 : return;
1447 :
1448 86 : master = (*thread)->master;
1449 :
1450 86 : frrtrace(9, frr_libfrr, event_cancel, master, (*thread)->xref->funcname,
1451 : (*thread)->xref->xref.file, (*thread)->xref->xref.line, NULL,
1452 : (*thread)->u.fd, (*thread)->u.val, (*thread)->arg,
1453 : (*thread)->u.sands.tv_sec);
1454 :
1455 86 : assert(master->owner == pthread_self());
1456 :
1457 86 : frr_with_mutex (&master->mtx) {
1458 86 : struct cancel_req *cr =
1459 86 : XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1460 86 : cr->thread = *thread;
1461 86 : listnode_add(master->cancel_req, cr);
1462 86 : do_event_cancel(master);
1463 :
1464 86 : *thread = NULL;
1465 : }
1466 : }
1467 :
1468 : /**
1469 : * Asynchronous cancellation.
1470 : *
1471 : * Called with either a struct event ** or void * to an event argument,
1472 : * this function posts the correct cancellation request and blocks until it is
1473 : * serviced.
1474 : *
1475 : * If the thread is currently running, execution blocks until it completes.
1476 : *
1477 : * The last two parameters are mutually exclusive, i.e. if you pass one the
1478 : * other must be NULL.
1479 : *
1480 : * When the cancellation procedure executes on the target event_master, the
1481 : * thread * provided is checked for nullity. If it is null, the thread is
1482 : * assumed to no longer exist and the cancellation request is a no-op. Thus
1483 : * users of this API must pass a back-reference when scheduling the original
1484 : * task.
1485 : *
1486 : * MT-Safe
1487 : *
1488 : * @param master the thread master with the relevant event / task
1489 : * @param thread pointer to thread to cancel
1490 : * @param eventobj the event
1491 : */
1492 68 : void event_cancel_async(struct event_loop *master, struct event **thread,
1493 : void *eventobj)
1494 : {
1495 68 : assert(!(thread && eventobj) && (thread || eventobj));
1496 :
1497 68 : if (thread && *thread)
1498 : frrtrace(9, frr_libfrr, event_cancel_async, master,
1499 : (*thread)->xref->funcname, (*thread)->xref->xref.file,
1500 : (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1501 : (*thread)->u.val, (*thread)->arg,
1502 : (*thread)->u.sands.tv_sec);
1503 : else
1504 68 : frrtrace(9, frr_libfrr, event_cancel_async, master, NULL, NULL,
1505 : 0, NULL, 0, 0, eventobj, 0);
1506 :
1507 68 : assert(master->owner != pthread_self());
1508 :
1509 136 : frr_with_mutex (&master->mtx) {
1510 68 : master->canceled = false;
1511 :
1512 68 : if (thread) {
1513 68 : struct cancel_req *cr =
1514 68 : XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1515 68 : cr->threadref = thread;
1516 68 : listnode_add(master->cancel_req, cr);
1517 0 : } else if (eventobj) {
1518 0 : struct cancel_req *cr =
1519 0 : XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1520 0 : cr->eventobj = eventobj;
1521 0 : listnode_add(master->cancel_req, cr);
1522 : }
1523 68 : AWAKEN(master);
1524 :
1525 136 : while (!master->canceled)
1526 68 : pthread_cond_wait(&master->cancel_cond, &master->mtx);
1527 : }
1528 :
1529 68 : if (thread)
1530 68 : *thread = NULL;
1531 68 : }
1532 : /* ------------------------------------------------------------------------- */
1533 :
1534 6744 : static struct timeval *thread_timer_wait(struct event_timer_list_head *timers,
1535 : struct timeval *timer_val)
1536 : {
1537 6744 : if (!event_timer_list_count(timers))
1538 : return NULL;
1539 :
1540 6446 : struct event *next_timer = event_timer_list_first(timers);
1541 :
1542 6446 : monotime_until(&next_timer->u.sands, timer_val);
1543 6446 : return timer_val;
1544 : }
1545 :
1546 595 : static struct event *thread_run(struct event_loop *m, struct event *thread,
1547 : struct event *fetch)
1548 : {
1549 595 : *fetch = *thread;
1550 595 : thread_add_unuse(m, thread);
1551 595 : return fetch;
1552 : }
1553 :
1554 368 : static int thread_process_io_helper(struct event_loop *m, struct event *thread,
1555 : short state, short actual_state, int pos)
1556 : {
1557 368 : struct event **thread_array;
1558 :
1559 : /*
1560 : * poll() clears the .events field, but the pollfd array we
1561 : * pass to poll() is a copy of the one used to schedule threads.
1562 : * We need to synchronize state between the two here by applying
1563 : * the same changes poll() made on the copy of the "real" pollfd
1564 : * array.
1565 : *
1566 : * This cleans up a possible infinite loop where we refuse
1567 : * to respond to a poll event but poll is insistent that
1568 : * we should.
1569 : */
1570 368 : m->handler.pfds[pos].events &= ~(state);
1571 :
1572 368 : if (!thread) {
1573 0 : if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1574 0 : flog_err(EC_LIB_NO_THREAD,
1575 : "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1576 : m->handler.pfds[pos].fd, actual_state);
1577 0 : return 0;
1578 : }
1579 :
1580 368 : if (thread->type == EVENT_READ)
1581 308 : thread_array = m->read;
1582 : else
1583 60 : thread_array = m->write;
1584 :
1585 368 : thread_array[thread->u.fd] = NULL;
1586 368 : event_list_add_tail(&m->ready, thread);
1587 368 : thread->type = EVENT_READY;
1588 :
1589 368 : return 1;
1590 : }
1591 :
1592 1021 : static inline void thread_process_io_inner_loop(struct event_loop *m,
1593 : unsigned int num,
1594 : struct pollfd *pfds, nfds_t *i,
1595 : uint32_t *ready)
1596 : {
1597 : /* no event for current fd? immediately continue */
1598 1021 : if (pfds[*i].revents == 0)
1599 : return;
1600 :
1601 384 : *ready = *ready + 1;
1602 :
1603 : /*
1604 : * Unless someone has called event_cancel from another
1605 : * pthread, the only thing that could have changed in
1606 : * m->handler.pfds while we were asleep is the .events
1607 : * field in a given pollfd. Barring event_cancel() that
1608 : * value should be a superset of the values we have in our
1609 : * copy, so there's no need to update it. Similarily,
1610 : * barring deletion, the fd should still be a valid index
1611 : * into the master's pfds.
1612 : *
1613 : * We are including POLLERR here to do a READ event
1614 : * this is because the read should fail and the
1615 : * read function should handle it appropriately
1616 : */
1617 384 : if (pfds[*i].revents & (POLLIN | POLLHUP | POLLERR)) {
1618 308 : thread_process_io_helper(m, m->read[pfds[*i].fd], POLLIN,
1619 : pfds[*i].revents, *i);
1620 : }
1621 384 : if (pfds[*i].revents & POLLOUT)
1622 60 : thread_process_io_helper(m, m->write[pfds[*i].fd], POLLOUT,
1623 : pfds[*i].revents, *i);
1624 :
1625 : /*
1626 : * if one of our file descriptors is garbage, remove the same
1627 : * from both pfds + update sizes and index
1628 : */
1629 384 : if (pfds[*i].revents & POLLNVAL) {
1630 17 : memmove(m->handler.pfds + *i, m->handler.pfds + *i + 1,
1631 17 : (m->handler.pfdcount - *i - 1) * sizeof(struct pollfd));
1632 17 : m->handler.pfdcount--;
1633 17 : m->handler.pfds[m->handler.pfdcount].fd = 0;
1634 17 : m->handler.pfds[m->handler.pfdcount].events = 0;
1635 :
1636 17 : memmove(pfds + *i, pfds + *i + 1,
1637 17 : (m->handler.copycount - *i - 1) * sizeof(struct pollfd));
1638 17 : m->handler.copycount--;
1639 17 : m->handler.copy[m->handler.copycount].fd = 0;
1640 17 : m->handler.copy[m->handler.copycount].events = 0;
1641 :
1642 17 : *i = *i - 1;
1643 : }
1644 : }
1645 :
1646 : /**
1647 : * Process I/O events.
1648 : *
1649 : * Walks through file descriptor array looking for those pollfds whose .revents
1650 : * field has something interesting. Deletes any invalid file descriptors.
1651 : *
1652 : * Try to impart some impartiality to handling of io. The event
1653 : * system will cycle through the fd's available for io
1654 : * giving each one a chance to go first.
1655 : *
1656 : * @param m the thread master
1657 : * @param num the number of active file descriptors (return value of poll())
1658 : */
1659 322 : static void thread_process_io(struct event_loop *m, unsigned int num)
1660 : {
1661 322 : unsigned int ready = 0;
1662 322 : struct pollfd *pfds = m->handler.copy;
1663 322 : nfds_t i, last_read = m->last_read % m->handler.copycount;
1664 :
1665 1076 : for (i = last_read; i < m->handler.copycount && ready < num; ++i)
1666 754 : thread_process_io_inner_loop(m, num, pfds, &i, &ready);
1667 :
1668 589 : for (i = 0; i < last_read && ready < num; ++i)
1669 267 : thread_process_io_inner_loop(m, num, pfds, &i, &ready);
1670 :
1671 322 : m->last_read++;
1672 322 : }
1673 :
1674 : /* Add all timers that have popped to the ready list. */
1675 6854 : static unsigned int thread_process_timers(struct event_loop *m,
1676 : struct timeval *timenow)
1677 : {
1678 6854 : struct timeval prev = *timenow;
1679 6854 : bool displayed = false;
1680 6854 : struct event *thread;
1681 6854 : unsigned int ready = 0;
1682 :
1683 6947 : while ((thread = event_timer_list_first(&m->timer))) {
1684 6618 : if (timercmp(timenow, &thread->u.sands, <))
1685 : break;
1686 93 : prev = thread->u.sands;
1687 93 : prev.tv_sec += 4;
1688 : /*
1689 : * If the timer would have popped 4 seconds in the
1690 : * past then we are in a situation where we are
1691 : * really getting behind on handling of events.
1692 : * Let's log it and do the right thing with it.
1693 : */
1694 93 : if (timercmp(timenow, &prev, >)) {
1695 0 : atomic_fetch_add_explicit(
1696 : &thread->hist->total_starv_warn, 1,
1697 : memory_order_seq_cst);
1698 0 : if (!displayed && !thread->ignore_timer_late) {
1699 0 : flog_warn(
1700 : EC_LIB_STARVE_THREAD,
1701 : "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1702 : thread);
1703 0 : displayed = true;
1704 : }
1705 : }
1706 :
1707 93 : event_timer_list_pop(&m->timer);
1708 93 : thread->type = EVENT_READY;
1709 93 : event_list_add_tail(&m->ready, thread);
1710 93 : ready++;
1711 : }
1712 :
1713 6854 : return ready;
1714 : }
1715 :
1716 : /* process a list en masse, e.g. for event thread lists */
1717 6858 : static unsigned int thread_process(struct event_list_head *list)
1718 : {
1719 6858 : struct event *thread;
1720 6858 : unsigned int ready = 0;
1721 :
1722 6993 : while ((thread = event_list_pop(list))) {
1723 135 : thread->type = EVENT_READY;
1724 135 : event_list_add_tail(&thread->master->ready, thread);
1725 135 : ready++;
1726 : }
1727 6858 : return ready;
1728 : }
1729 :
1730 :
1731 : /* Fetch next ready thread. */
1732 597 : struct event *event_fetch(struct event_loop *m, struct event *fetch)
1733 : {
1734 597 : struct event *thread = NULL;
1735 597 : struct timeval now;
1736 597 : struct timeval zerotime = {0, 0};
1737 597 : struct timeval tv;
1738 597 : struct timeval *tw = NULL;
1739 597 : bool eintr_p = false;
1740 597 : int num = 0;
1741 :
1742 7455 : do {
1743 : /* Handle signals if any */
1744 7455 : if (m->handle_signals)
1745 6971 : frr_sigevent_process();
1746 :
1747 7453 : pthread_mutex_lock(&m->mtx);
1748 :
1749 : /* Process any pending cancellation requests */
1750 7453 : do_event_cancel(m);
1751 :
1752 : /*
1753 : * Attempt to flush ready queue before going into poll().
1754 : * This is performance-critical. Think twice before modifying.
1755 : */
1756 7453 : if ((thread = event_list_pop(&m->ready))) {
1757 595 : fetch = thread_run(m, thread, fetch);
1758 595 : if (fetch->ref)
1759 567 : *fetch->ref = NULL;
1760 595 : pthread_mutex_unlock(&m->mtx);
1761 595 : if (!m->ready_run_loop)
1762 486 : GETRUSAGE(&m->last_getrusage);
1763 595 : m->ready_run_loop = true;
1764 595 : break;
1765 : }
1766 :
1767 6858 : m->ready_run_loop = false;
1768 : /* otherwise, tick through scheduling sequence */
1769 :
1770 : /*
1771 : * Post events to ready queue. This must come before the
1772 : * following block since events should occur immediately
1773 : */
1774 6858 : thread_process(&m->event);
1775 :
1776 : /*
1777 : * If there are no tasks on the ready queue, we will poll()
1778 : * until a timer expires or we receive I/O, whichever comes
1779 : * first. The strategy for doing this is:
1780 : *
1781 : * - If there are events pending, set the poll() timeout to zero
1782 : * - If there are no events pending, but there are timers
1783 : * pending, set the timeout to the smallest remaining time on
1784 : * any timer.
1785 : * - If there are neither timers nor events pending, but there
1786 : * are file descriptors pending, block indefinitely in poll()
1787 : * - If nothing is pending, it's time for the application to die
1788 : *
1789 : * In every case except the last, we need to hit poll() at least
1790 : * once per loop to avoid starvation by events
1791 : */
1792 6858 : if (!event_list_count(&m->ready))
1793 6744 : tw = thread_timer_wait(&m->timer, &tv);
1794 :
1795 6858 : if (event_list_count(&m->ready) ||
1796 6446 : (tw && !timercmp(tw, &zerotime, >)))
1797 : tw = &zerotime;
1798 :
1799 298 : if (!tw && m->handler.pfdcount == 0) { /* die */
1800 0 : pthread_mutex_unlock(&m->mtx);
1801 0 : fetch = NULL;
1802 0 : break;
1803 : }
1804 :
1805 : /*
1806 : * Copy pollfd array + # active pollfds in it. Not necessary to
1807 : * copy the array size as this is fixed.
1808 : */
1809 6858 : m->handler.copycount = m->handler.pfdcount;
1810 6858 : memcpy(m->handler.copy, m->handler.pfds,
1811 : m->handler.copycount * sizeof(struct pollfd));
1812 :
1813 6858 : pthread_mutex_unlock(&m->mtx);
1814 : {
1815 6858 : eintr_p = false;
1816 6858 : num = fd_poll(m, tw, &eintr_p);
1817 : }
1818 6858 : pthread_mutex_lock(&m->mtx);
1819 :
1820 : /* Handle any errors received in poll() */
1821 6858 : if (num < 0) {
1822 4 : if (eintr_p) {
1823 4 : pthread_mutex_unlock(&m->mtx);
1824 : /* loop around to signal handler */
1825 4 : continue;
1826 : }
1827 :
1828 : /* else die */
1829 0 : flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
1830 : safe_strerror(errno));
1831 0 : pthread_mutex_unlock(&m->mtx);
1832 0 : fetch = NULL;
1833 0 : break;
1834 : }
1835 :
1836 : /* Post timers to ready queue. */
1837 6854 : monotime(&now);
1838 6854 : thread_process_timers(m, &now);
1839 :
1840 : /* Post I/O to ready queue. */
1841 6854 : if (num > 0)
1842 322 : thread_process_io(m, num);
1843 :
1844 6854 : pthread_mutex_unlock(&m->mtx);
1845 :
1846 6858 : } while (!thread && m->spin);
1847 :
1848 595 : return fetch;
1849 : }
1850 :
1851 626 : static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
1852 : {
1853 626 : return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1854 626 : + (a.tv_usec - b.tv_usec));
1855 : }
1856 :
1857 626 : unsigned long event_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1858 : unsigned long *cputime)
1859 : {
1860 : #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1861 :
1862 : #ifdef __FreeBSD__
1863 : /*
1864 : * FreeBSD appears to have an issue when calling clock_gettime
1865 : * with CLOCK_THREAD_CPUTIME_ID really close to each other
1866 : * occassionally the now time will be before the start time.
1867 : * This is not good and FRR is ending up with CPU HOG's
1868 : * when the subtraction wraps to very large numbers
1869 : *
1870 : * What we are going to do here is cheat a little bit
1871 : * and notice that this is a problem and just correct
1872 : * it so that it is impossible to happen
1873 : */
1874 : if (start->cpu.tv_sec == now->cpu.tv_sec &&
1875 : start->cpu.tv_nsec > now->cpu.tv_nsec)
1876 : now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1877 : else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1878 : now->cpu.tv_sec = start->cpu.tv_sec;
1879 : now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1880 : }
1881 : #endif
1882 626 : *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1883 626 : + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1884 : #else
1885 : /* This is 'user + sys' time. */
1886 : *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1887 : + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
1888 : #endif
1889 626 : return timeval_elapsed(now->real, start->real);
1890 : }
1891 :
1892 : /*
1893 : * We should aim to yield after yield milliseconds, which defaults
1894 : * to EVENT_YIELD_TIME_SLOT .
1895 : * Note: we are using real (wall clock) time for this calculation.
1896 : * It could be argued that CPU time may make more sense in certain
1897 : * contexts. The things to consider are whether the thread may have
1898 : * blocked (in which case wall time increases, but CPU time does not),
1899 : * or whether the system is heavily loaded with other processes competing
1900 : * for CPU time. On balance, wall clock time seems to make sense.
1901 : * Plus it has the added benefit that gettimeofday should be faster
1902 : * than calling getrusage.
1903 : */
1904 57 : int event_should_yield(struct event *thread)
1905 : {
1906 57 : int result;
1907 :
1908 57 : frr_with_mutex (&thread->mtx) {
1909 57 : result = monotime_since(&thread->real, NULL)
1910 57 : > (int64_t)thread->yield;
1911 : }
1912 57 : return result;
1913 : }
1914 :
1915 6 : void event_set_yield_time(struct event *thread, unsigned long yield_time)
1916 : {
1917 6 : frr_with_mutex (&thread->mtx) {
1918 6 : thread->yield = yield_time;
1919 : }
1920 6 : }
1921 :
1922 1145 : void event_getrusage(RUSAGE_T *r)
1923 : {
1924 1145 : monotime(&r->real);
1925 1145 : if (!cputime_enabled) {
1926 0 : memset(&r->cpu, 0, sizeof(r->cpu));
1927 0 : return;
1928 : }
1929 :
1930 : #ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1931 : /* not currently implemented in Linux's vDSO, but maybe at some point
1932 : * in the future?
1933 : */
1934 1145 : clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1935 : #else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1936 : #if defined RUSAGE_THREAD
1937 : #define FRR_RUSAGE RUSAGE_THREAD
1938 : #else
1939 : #define FRR_RUSAGE RUSAGE_SELF
1940 : #endif
1941 : getrusage(FRR_RUSAGE, &(r->cpu));
1942 : #endif
1943 : }
1944 :
1945 : /*
1946 : * Call a thread.
1947 : *
1948 : * This function will atomically update the thread's usage history. At present
1949 : * this is the only spot where usage history is written. Nevertheless the code
1950 : * has been written such that the introduction of writers in the future should
1951 : * not need to update it provided the writers atomically perform only the
1952 : * operations done here, i.e. updating the total and maximum times. In
1953 : * particular, the maximum real and cpu times must be monotonically increasing
1954 : * or this code is not correct.
1955 : */
1956 595 : void event_call(struct event *thread)
1957 : {
1958 595 : RUSAGE_T before, after;
1959 595 : bool suppress_warnings = EVENT_ARG(thread);
1960 :
1961 : /* if the thread being called is the CLI, it may change cputime_enabled
1962 : * ("service cputime-stats" command), which can result in nonsensical
1963 : * and very confusing warnings
1964 : */
1965 595 : bool cputime_enabled_here = cputime_enabled;
1966 :
1967 595 : if (thread->master->ready_run_loop)
1968 595 : before = thread->master->last_getrusage;
1969 : else
1970 0 : GETRUSAGE(&before);
1971 :
1972 595 : thread->real = before.real;
1973 :
1974 595 : frrtrace(9, frr_libfrr, event_call, thread->master,
1975 : thread->xref->funcname, thread->xref->xref.file,
1976 : thread->xref->xref.line, NULL, thread->u.fd, thread->u.val,
1977 : thread->arg, thread->u.sands.tv_sec);
1978 :
1979 595 : pthread_setspecific(thread_current, thread);
1980 593 : (*thread->func)(thread);
1981 593 : pthread_setspecific(thread_current, NULL);
1982 :
1983 593 : GETRUSAGE(&after);
1984 593 : thread->master->last_getrusage = after;
1985 :
1986 593 : unsigned long walltime, cputime;
1987 593 : unsigned long exp;
1988 :
1989 593 : walltime = event_consumed_time(&after, &before, &cputime);
1990 :
1991 : /* update walltime */
1992 593 : atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
1993 : memory_order_seq_cst);
1994 593 : exp = atomic_load_explicit(&thread->hist->real.max,
1995 : memory_order_seq_cst);
1996 593 : while (exp < walltime
1997 593 : && !atomic_compare_exchange_weak_explicit(
1998 : &thread->hist->real.max, &exp, walltime,
1999 : memory_order_seq_cst, memory_order_seq_cst))
2000 : ;
2001 :
2002 593 : if (cputime_enabled_here && cputime_enabled) {
2003 : /* update cputime */
2004 593 : atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2005 : memory_order_seq_cst);
2006 593 : exp = atomic_load_explicit(&thread->hist->cpu.max,
2007 : memory_order_seq_cst);
2008 593 : while (exp < cputime
2009 593 : && !atomic_compare_exchange_weak_explicit(
2010 : &thread->hist->cpu.max, &exp, cputime,
2011 : memory_order_seq_cst, memory_order_seq_cst))
2012 : ;
2013 : }
2014 :
2015 593 : atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2016 : memory_order_seq_cst);
2017 593 : atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2018 : memory_order_seq_cst);
2019 :
2020 593 : if (suppress_warnings)
2021 512 : return;
2022 :
2023 81 : if (cputime_enabled_here && cputime_enabled && cputime_threshold
2024 81 : && cputime > cputime_threshold) {
2025 : /*
2026 : * We have a CPU Hog on our hands. The time FRR has spent
2027 : * doing actual work (not sleeping) is greater than 5 seconds.
2028 : * Whinge about it now, so we're aware this is yet another task
2029 : * to fix.
2030 : */
2031 0 : atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2032 : 1, memory_order_seq_cst);
2033 0 : flog_warn(
2034 : EC_LIB_SLOW_THREAD_CPU,
2035 : "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2036 : thread->xref->funcname, (unsigned long)thread->func,
2037 : walltime / 1000, cputime / 1000);
2038 :
2039 81 : } else if (walltime_threshold && walltime > walltime_threshold) {
2040 : /*
2041 : * The runtime for a task is greater than 5 seconds, but the
2042 : * cpu time is under 5 seconds. Let's whine about this because
2043 : * this could imply some sort of scheduling issue.
2044 : */
2045 0 : atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2046 : 1, memory_order_seq_cst);
2047 81 : flog_warn(
2048 : EC_LIB_SLOW_THREAD_WALL,
2049 : "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2050 : thread->xref->funcname, (unsigned long)thread->func,
2051 : walltime / 1000, cputime / 1000);
2052 : }
2053 : }
2054 :
2055 : /* Execute thread */
2056 0 : void _event_execute(const struct xref_eventsched *xref, struct event_loop *m,
2057 : void (*func)(struct event *), void *arg, int val,
2058 : struct event **eref)
2059 : {
2060 0 : struct event *thread;
2061 :
2062 : /* Cancel existing scheduled task TODO -- nice to do in 1 lock cycle */
2063 0 : if (eref)
2064 0 : event_cancel(eref);
2065 :
2066 : /* Get or allocate new thread to execute. */
2067 0 : frr_with_mutex (&m->mtx) {
2068 0 : thread = thread_get(m, EVENT_EVENT, func, arg, xref);
2069 :
2070 : /* Set its event value. */
2071 0 : frr_with_mutex (&thread->mtx) {
2072 0 : thread->add_type = EVENT_EXECUTE;
2073 0 : thread->u.val = val;
2074 0 : thread->ref = &thread;
2075 : }
2076 : }
2077 :
2078 : /* Execute thread doing all accounting. */
2079 0 : event_call(thread);
2080 :
2081 : /* Give back or free thread. */
2082 0 : thread_add_unuse(m, thread);
2083 0 : }
2084 :
2085 : /* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2086 0 : void debug_signals(const sigset_t *sigs)
2087 : {
2088 0 : int i, found;
2089 0 : sigset_t tmpsigs;
2090 0 : char buf[300];
2091 :
2092 : /*
2093 : * We're only looking at the non-realtime signals here, so we need
2094 : * some limit value. Platform differences mean at some point we just
2095 : * need to pick a reasonable value.
2096 : */
2097 : #if defined SIGRTMIN
2098 : # define LAST_SIGNAL SIGRTMIN
2099 : #else
2100 : # define LAST_SIGNAL 32
2101 : #endif
2102 :
2103 :
2104 0 : if (sigs == NULL) {
2105 0 : sigemptyset(&tmpsigs);
2106 0 : pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2107 0 : sigs = &tmpsigs;
2108 : }
2109 :
2110 0 : found = 0;
2111 0 : buf[0] = '\0';
2112 :
2113 0 : for (i = 0; i < LAST_SIGNAL; i++) {
2114 0 : char tmp[20];
2115 :
2116 0 : if (sigismember(sigs, i) > 0) {
2117 0 : if (found > 0)
2118 0 : strlcat(buf, ",", sizeof(buf));
2119 0 : snprintf(tmp, sizeof(tmp), "%d", i);
2120 0 : strlcat(buf, tmp, sizeof(buf));
2121 0 : found++;
2122 : }
2123 : }
2124 :
2125 0 : if (found == 0)
2126 0 : snprintf(buf, sizeof(buf), "<none>");
2127 :
2128 0 : zlog_debug("%s: %s", __func__, buf);
2129 0 : }
2130 :
2131 0 : static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2132 : const struct event *thread)
2133 : {
2134 0 : static const char *const types[] = {
2135 : [EVENT_READ] = "read", [EVENT_WRITE] = "write",
2136 : [EVENT_TIMER] = "timer", [EVENT_EVENT] = "event",
2137 : [EVENT_READY] = "ready", [EVENT_UNUSED] = "unused",
2138 : [EVENT_EXECUTE] = "exec",
2139 : };
2140 0 : ssize_t rv = 0;
2141 0 : char info[16] = "";
2142 :
2143 0 : if (!thread)
2144 0 : return bputs(buf, "{(thread *)NULL}");
2145 :
2146 0 : rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2147 :
2148 0 : if (thread->type < array_size(types) && types[thread->type])
2149 0 : rv += bprintfrr(buf, " %-6s", types[thread->type]);
2150 : else
2151 0 : rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2152 :
2153 0 : switch (thread->type) {
2154 0 : case EVENT_READ:
2155 : case EVENT_WRITE:
2156 0 : snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2157 0 : break;
2158 :
2159 0 : case EVENT_TIMER:
2160 0 : snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2161 0 : break;
2162 : case EVENT_READY:
2163 : case EVENT_EVENT:
2164 : case EVENT_UNUSED:
2165 : case EVENT_EXECUTE:
2166 : break;
2167 : }
2168 :
2169 0 : rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2170 0 : thread->xref->funcname, thread->xref->dest,
2171 0 : thread->xref->xref.file, thread->xref->xref.line);
2172 0 : return rv;
2173 : }
2174 :
2175 4 : printfrr_ext_autoreg_p("TH", printfrr_thread);
2176 0 : static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2177 : const void *ptr)
2178 : {
2179 0 : const struct event *thread = ptr;
2180 0 : struct timespec remain = {};
2181 :
2182 0 : if (ea->fmt[0] == 'D') {
2183 0 : ea->fmt++;
2184 0 : return printfrr_thread_dbg(buf, ea, thread);
2185 : }
2186 :
2187 0 : if (!thread) {
2188 : /* need to jump over time formatting flag characters in the
2189 : * input format string, i.e. adjust ea->fmt!
2190 : */
2191 0 : printfrr_time(buf, ea, &remain,
2192 : TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2193 0 : return bputch(buf, '-');
2194 : }
2195 :
2196 0 : TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2197 0 : return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2198 : }
|