-
Notifications
You must be signed in to change notification settings - Fork 243
Expand file tree
/
Copy pathrandom-syscall.c
More file actions
3193 lines (2955 loc) · 128 KB
/
Copy pathrandom-syscall.c
File metadata and controls
3193 lines (2955 loc) · 128 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Call a single random syscall with random args.
*/
#include <errno.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include "arch.h" // biarch
#include "arg-decoder.h"
#include "child.h"
#include "cmp_hints.h"
#include "cred_throttle.h"
#include "debug.h"
#include "fd.h"
#include "kcov.h"
#include "locks.h"
#include "minicorpus.h"
#include "params.h"
#include "pids.h"
#include "pre_crash_ring.h"
#include "prop_ring.h"
#include "random.h"
#include "rnd.h"
#include "sequence.h"
#include "shm.h"
#include "signals.h"
#include "sanitise.h"
#include "stats.h"
#include "stats_ring.h"
#include "strategy.h"
#include "syscall.h"
#include "syscall_record.h"
#include "tables.h"
#include "trinity.h"
#include "utils.h"
/* The per-pending-index success counter in kcov_shared is sized off
* REEXEC_PENDING_PICK_HIST_NR (include/kcov.h); the per-call attribution
* buffer it indexes is sized off MAX_REEXEC_PENDING (include/cmp_hints.h).
* They MUST stay equal -- a wider counter under-uses the kcov_shm field
* and a narrower one would let the clamped index drop the last slots'
* success signal on the floor. kcov.h does not include cmp_hints.h
* (to stay self-contained), so the parity check lives here, where both
* headers are in scope. */
_Static_assert(REEXEC_PENDING_PICK_HIST_NR == MAX_REEXEC_PENDING,
"REEXEC_PENDING_PICK_HIST_NR must equal MAX_REEXEC_PENDING");
/*
* Compression factor for the frontier-weighted acceptance denominator.
* See the gate in set_syscall_nr_coverage_frontier() for the rationale.
*/
#define FRONTIER_SOFT_SCALE 16
/*
* Acceptance-weight scale for the cold/untried-syscall fallback path in
* set_syscall_nr_coverage_frontier(). Engaged when the frontier ring
* is silent (max_weight <= 2) so the picker has a per-syscall signal
* to steer on instead of degenerating to plain uniform draw -- see the
* fallback gate for the full rationale.
*
* Sized at 256 to give the integer-divide inverse-productivity transform
* (SCALE - floor(SCALE * edges / calls)) sub-percent discrimination: at
* the previous SCALE=16, any syscall productive at < 6.25% (= 1/16) of
* its calls floored the divide to 0 and collapsed to MAX, indistinguishable
* from a never-tried slot. At SCALE=256 the same divide resolves down
* to ~0.4%/step, so syscalls with even a handful of productive calls in
* the high-thousands range no longer pin at the cold ceiling. 256 is
* also the Q8.8 unit used by adapt_budget's mult table -- staying on a
* power-of-two keeps the rnd_modulo_u32(SCALE + 1) draw in the same
* Lemire fast-path the soft-max path already uses.
*/
#define FRONTIER_COLD_SCALE 256
static inline unsigned ilog2_ul(unsigned long x)
{
return x ? (unsigned)(63 - __builtin_clzl(x)) : 0;
}
/*
* This function decides if we're going to be doing a 32bit or 64bit syscall.
* There are various factors involved here, from whether we're on a 32-bit only arch
* to 'we asked to do a 32bit only syscall' and more.. Hairy.
*/
/*
* Biarch-only: pick which syscall table this call uses, refresh the
* caller's per-child active_syscalls pointer, and return do32. Uniarch
* builds bypass this entirely — child->active_syscalls is set once at
* init time to shm->active_syscalls and never re-evaluated.
*
* *nr_syscalls_out receives the current shm->nr_active_*bit_syscalls
* count, NOT max_nr_*syscalls: the picker samples the compact
* active_syscalls[0..nr_active) prefix maintained by
* activate_syscall_in_table()/deactivate_syscall_in_table(), and
* sampling the full max table on a restricted run (capability filter,
* -c/-r/-g, runtime deactivation) burns the retry budget on slots known
* to be zero. The load is a snapshot — a racing deactivate that lowers
* the count after we read it is absorbed by the zero-retry guard at the
* picker (deactivate swap-removes and zeros the LAST slot, so a stale
* read can see a transient 0 mid-swap).
*/
bool choose_syscall_table(struct childdata *child,
unsigned int *nr_syscalls_out)
{
bool do32 = false;
/* First, check that we have syscalls enabled in either table.
* Read the cached validity bits maintained by validate_syscall_table_*
* and the deactivate_syscall{32,64}() paths instead of re-running the
* walk on every pick. */
if (__atomic_load_n(&shm->valid_syscall_table_64, __ATOMIC_RELAXED) == false) {
use_64bit = false;
/* If no 64bit syscalls enabled, force 32bit. */
do32 = true;
}
if (__atomic_load_n(&shm->valid_syscall_table_32, __ATOMIC_RELAXED) == false)
use_32bit = false;
/* If both tables enabled, pick randomly. */
if ((use_64bit == true) && (use_32bit == true)) {
/* 10% possibility of a 32bit syscall */
if (ONE_IN(10))
do32 = true;
}
if (do32 == false) {
syscalls = syscalls_64bit;
child->active_syscalls = shm->active_syscalls64;
*nr_syscalls_out = __atomic_load_n(&shm->nr_active_64bit_syscalls,
__ATOMIC_RELAXED);
} else {
syscalls = syscalls_32bit;
child->active_syscalls = shm->active_syscalls32;
*nr_syscalls_out = __atomic_load_n(&shm->nr_active_32bit_syscalls,
__ATOMIC_RELAXED);
}
return do32;
}
/*
* Validation-failure resilience: a syscallnr drawn by the picker is
* deactivated only after VALIDATE_FAIL_THRESHOLD consecutive picks of
* that syscall fail validate_specific_syscall_silent(). A transient
* flap (e.g. a probe that EAGAIN'd once or briefly tripped a kernel
* gate) used to permanently kill the entry on the first failure with
* no log; now the counter has to build up and the deactivation is
* announced. The counter (shm->syscall_validation_failures[]) is
* shared across children so observation accumulates fleet-wide, and
* resets to 0 on the first successful validation for that slot.
*/
#define VALIDATE_FAIL_THRESHOLD 3
static void note_validation_success(unsigned int syscallnr, bool do32)
{
unsigned int arch = do32 ? 1 : 0;
if (__atomic_load_n(&shm->syscall_validation_failures[arch][syscallnr],
__ATOMIC_RELAXED) != 0)
__atomic_store_n(&shm->syscall_validation_failures[arch][syscallnr],
0, __ATOMIC_RELAXED);
}
static void note_validation_failure(unsigned int syscallnr, bool do32)
{
unsigned int arch = do32 ? 1 : 0;
unsigned int count;
struct syscallentry *entry;
const char *name;
count = (unsigned int)__atomic_add_fetch(
&shm->syscall_validation_failures[arch][syscallnr], 1,
__ATOMIC_RELAXED);
if (count < VALIDATE_FAIL_THRESHOLD)
return;
entry = get_syscall_entry(syscallnr, do32);
name = (entry != NULL) ? entry->name : "<unknown>";
output(0, "deactivating syscall %s (nr=%u) after %u validation failures\n",
name, syscallnr, count);
__atomic_store_n(&shm->syscall_validation_failures[arch][syscallnr], 0,
__ATOMIC_RELAXED);
deactivate_syscall_locked(syscallnr, do32);
if ((do_specific_syscall || random_selection ||
desired_group != GROUP_NONE) &&
no_syscalls_enabled() == true)
output(0, "%s was the last syscall in the targeted set; "
"depleted via %u validation failures\n",
name, VALIDATE_FAIL_THRESHOLD);
}
/*
* Check if a syscall entry belongs to the target group.
* Used by group biasing to filter candidates.
*/
static bool syscall_in_group(unsigned int nr, bool do32, unsigned int target_group)
{
struct syscallentry *entry;
entry = get_syscall_entry(nr, do32);
if (entry == NULL)
return false;
return entry->group == target_group;
}
/*
* Pick the syscall to run under STRATEGY_HEURISTIC: uniform draw from
* active_syscalls, then layered biases — group affinity (70% prefer last
* group) and kcov cold-skip (probabilistic). This is trinity's
* pre-rotation default behaviour.
*/
static bool set_syscall_nr_heuristic(struct syscallrecord *rec,
struct childdata *child)
{
struct syscallentry *entry;
unsigned int syscallnr;
int val;
bool do32;
unsigned int group_attempts = 0;
unsigned int kcov_attempts = 0;
unsigned int outer_attempts = 0;
unsigned int nr_syscalls;
/* Pick the syscall table once per call: in uniarch the do32 result
* is a constant; in biarch the do32 dice rolls once per pick. The
* nr_syscalls snapshot is the CURRENT active count
* (shm->nr_active_*) so the rnd_modulo_u32() draw indexes directly
* into the compact active_syscalls[0..nr_active) prefix and a
* restricted run never wastes the retry budget on the sparse-zero
* tail of the max table. */
if (biarch) {
do32 = choose_syscall_table(child, &nr_syscalls);
} else {
do32 = false;
nr_syscalls = __atomic_load_n(&shm->nr_active_syscalls,
__ATOMIC_RELAXED);
}
retry:
if (no_syscalls_enabled() == true) {
output(0, "[%d] No more syscalls enabled. Exiting\n", mypid());
__atomic_store_n(&shm->exit_reason, EXIT_NO_SYSCALLS_ENABLED, __ATOMIC_RELAXED);
return FAIL;
}
/* Bail if we have spent too many iterations failing to pick a
* usable syscall. Even sampling the compact active prefix, a table
* dominated by EXPENSIVE syscalls (kept at 1-in-1000) can wedge
* the child in a tight retry loop. */
if (outer_attempts++ > 10000) {
output(0, "[%d] set_syscall_nr exceeded retry budget\n", mypid());
return FAIL;
}
syscallnr = rnd_modulo_u32(nr_syscalls);
/* If we got a syscallnr which is not active repeat the attempt,
* since another child has switched that syscall off already.*/
val = child->active_syscalls[syscallnr];
if (val == 0)
goto retry;
syscallnr = val - 1;
/*
* EXPENSIVE early-out: bitmap test before validate + entry fetch,
* so the 999/1000 reject path skips the cache miss on the
* syscallentry that the EXPENSIVE block below used to require.
*/
if (syscall_is_expensive(syscallnr, do32) && !ONE_IN(1000))
goto retry;
if (validate_specific_syscall_silent(syscalls, syscallnr) == false) {
note_validation_failure(syscallnr, do32);
goto retry;
}
note_validation_success(syscallnr, do32);
entry = get_syscall_entry(syscallnr, do32);
if (entry == NULL)
goto retry;
/*
* Group biasing: when enabled and we have a previous group context,
* bias selection toward syscalls in the same group.
*
* 70% of the time: prefer same group as last call
* 25% of the time: accept any syscall (no bias)
* 5% of the time: accept any syscall (exploration)
*
* If we can't find a same-group syscall after 20 attempts,
* fall through and accept whatever we picked.
*/
if (group_bias && child->last_group != GROUP_NONE) {
unsigned int dice = rnd_modulo_u32(100);
if (dice < 70) {
/* Try to pick from same group */
if (!syscall_in_group(syscallnr, do32, child->last_group)) {
group_attempts++;
if (group_attempts < 20)
goto retry;
/* Gave up, accept this one. */
}
}
/* dice >= 70: accept any syscall */
}
/* Coverage-guided cold avoidance: if this syscall has stopped
* finding new edges, skip it with a probability that grows the
* staler it gets — a syscall stuck for one threshold-window gets
* the same 50% baseline as before, but one stuck for ten gets
* skipped 90% of the time.
*
* Suppressed inside a SR_PLATEAU_FORCE intervention when the
* random-rescue classifier has accumulated enough RRC_COLD_SKIP
* evidence to amplify that class: the rescues that have been
* carrying the fleet past the plateau are mostly cold-skipped
* syscalls, and structured replay means letting the heuristic
* actually pick them. Both gates checked because either alone
* is insufficient -- plateau_active without amplification means
* a different class won, and amplification cannot stay live
* after the plateau lifts (the orchestrator clears the field on
* its next non-intervention rotation). */
if (!plateau_rescue_bias_active_for(RRC_COLD_SKIP)) {
unsigned int skip_pct = kcov_syscall_cold_skip_pct(syscallnr);
if (skip_pct > 0 && rnd_modulo_u32(100) < skip_pct) {
kcov_attempts++;
if (kcov_attempts < 20)
goto retry;
}
}
/* --cred-throttle gate. Returns false unconditionally when the flag
* is off (single RELAXED bool load short-circuit, no per-class state
* touched) so the default picker distribution is byte-identical.
* Placed AFTER validate/EXPENSIVE/group/cold-skip so a rejected pick
* shares the existing outer_attempts budget instead of needing its
* own retry cap. */
if (cred_throttle_should_reject(syscallnr, do32))
goto retry;
/* Wall-lever SHADOW gate: the candidate has cleared every
* live correctness + bias gate above, so this is the population a
* live wall-lever variant would have to act on. Bump the eligible
* counter on every plateau-active pick (probe short-circuits to
* false outside the plateau, so the conditional is cheap) and bump
* the would_suppress family when the data-driven predicate fires.
* Live picker is byte-identical -- the lever does NOT reject here. */
if (kcov_shm != NULL &&
__atomic_load_n(&kcov_shm->plateau_active, __ATOMIC_ACQUIRE)) {
__atomic_fetch_add(&shm->stats.wall_lever_eligible_total, 1UL,
__ATOMIC_RELAXED);
if (wall_lever_should_suppress_shadow(syscallnr)) {
__atomic_fetch_add(
&shm->stats.wall_lever_would_suppress_total,
1UL, __ATOMIC_RELAXED);
__atomic_fetch_add(
&shm->stats.wall_lever_would_suppress[syscallnr],
1UL, __ATOMIC_RELAXED);
}
}
/* publish (nr, do32bit) as a coherent pair. */
srec_publish_begin(rec);
rec->do32bit = do32;
rec->nr = syscallnr;
srec_publish_end(rec);
return true;
}
/*
* Anti-prior reject-retry budget. The accept gate's per-call rejection
* rate sits at 1 - 1/MAX_BOOST = 87.5% at the median; over a sparse
* active table this still resolves in a handful of retries on average,
* but a pathological mix (e.g. every active syscall sitting at the
* over-picked saturation point, accept = 1/MAX_BOOST^2) could push past
* the natural recovery budget. Bound at 64 so the inner loop never
* burns more than the per-iteration cost is worth; falling through
* means accepting whatever the picker happened to land on, which
* degrades anti-prior gracefully to uniform pick rather than wedging
* the syscall picker. Kept well below the outer 10000 budget so the
* gate cannot starve the rest of the validate / EXPENSIVE gates.
*/
#define ANTI_PRIOR_RETRY_CAP 64U
/*
* SHADOW deep-but-warm candidate predicate tunables -- consumed by the
* post-collect bookkeeping in dispatch_step() that bumps shm->stats.
* warm_reserve_candidates*. Static defaults; no runtime knob exists
* yet because the live STAGE B reserve+replay consumer is not built.
*
* DEEP_WARM_PCS_MIN_CALLS
* Warmup floor on the running-mean clause: a syscall whose lifetime
* invocation count is below this threshold cannot trip the
* "per-call PCs >= MULT * mean" check. Keeps the first handful of
* calls on a syscall from all qualifying against their own zero or
* near-zero baseline mean. Sized to the same order of magnitude as
* the bandit / remote-adaptive sample floors elsewhere; large
* enough to filter cold-start noise, small enough that any syscall
* that gets routine traffic clears it inside a single periodic
* dump window.
* DEEP_WARM_PCS_MEAN_MULT
* High-side multiplier on the running mean for the per-call PC-
* density clause: a call's local_distinct_pcs must reach at least
* MULT * mean to qualify. 2 is the "noticeably deeper than this
* syscall's own typical trace" cutoff -- aggressive enough to
* catch the long-tail expensive calls without flagging every call
* that randomly lands above the mean. Picked over a true quartile
* mechanism so the predicate stays cheap (one integer cross-
* product, no per-syscall sorted-sample buffer) -- noted as the
* STAGE A default in the dispatchable plan.
* DEEP_WARM_TRACE_NUM / DEEP_WARM_TRACE_DEN
* Threshold on the per-call PC trace length as a fraction of the
* KCOV_TRACE_SIZE buffer cap: a call's trace_size must reach at
* least NUM/DEN of the buffer to qualify under the near-truncation
* clause. 9/10 matches the dispatchable plan's 0.9 default; the
* ratio is applied as a cross-product to avoid the runtime divide.
*/
#define DEEP_WARM_PCS_MIN_CALLS 16UL
#define DEEP_WARM_PCS_MEAN_MULT 2UL
#define DEEP_WARM_TRACE_NUM 9UL
#define DEEP_WARM_TRACE_DEN 10UL
/*
* Pick the syscall to run under STRATEGY_RANDOM: uniform draw from
* active_syscalls with no further biasing. The "shake the dust off"
* pass — useless on its own, but exposes paths the heuristic biases
* systematically suppress (cold syscalls, productive-pair-only flow).
*
* Active_syscalls + EXPENSIVE + AVOID_SYSCALL gating remain because
* those are correctness gates, not selection biases — bypassing them
* just wastes iterations on calls we know we can't make.
*
* Anti-prior plateau intervention: during an SR_PLATEAU_FORCE window
* the orchestrator may have rotated into PIM_ANTI_PRIOR mode, in which
* case the per-candidate accept gate inverts the picker's learned
* per-syscall pick-rate distribution -- syscalls the bandit has been
* over-selecting get rejected at up to MAX_BOOST^2:1, low-count
* syscalls accept at full uniform rate. Outside the intervention the
* gate's atomic load short-circuits and the picker is the historical
* pure-uniform draw.
*/
bool set_syscall_nr_random(struct syscallrecord *rec,
struct childdata *child)
{
unsigned int syscallnr;
int val;
bool do32;
unsigned int outer_attempts = 0;
unsigned int nr_syscalls;
unsigned int anti_prior_attempts = 0;
bool anti_prior_on;
/* See the matching comment in set_syscall_nr_heuristic — the table
* pick is a per-call decision, not a per-retry one, and nr_syscalls
* is the active-prefix count rather than max_nr_*syscalls. */
if (biarch) {
do32 = choose_syscall_table(child, &nr_syscalls);
} else {
do32 = false;
nr_syscalls = __atomic_load_n(&shm->nr_active_syscalls,
__ATOMIC_RELAXED);
}
/* Latch the anti-prior mode once per pick so the per-retry inner
* loop reads a stable answer; a rotation that lands mid-pick is
* harmless either way (we either over-shoot one retry budget or
* under-shoot one) but caching avoids redoing the relaxed atomic
* load on every retry. */
anti_prior_on = plateau_anti_prior_active();
retry:
if (no_syscalls_enabled() == true) {
output(0, "[%d] No more syscalls enabled. Exiting\n", mypid());
__atomic_store_n(&shm->exit_reason, EXIT_NO_SYSCALLS_ENABLED, __ATOMIC_RELAXED);
return FAIL;
}
if (outer_attempts++ > 10000) {
output(0, "[%d] set_syscall_nr_random exceeded retry budget\n", mypid());
return FAIL;
}
syscallnr = rnd_modulo_u32(nr_syscalls);
val = child->active_syscalls[syscallnr];
if (val == 0)
goto retry;
syscallnr = val - 1;
/* EXPENSIVE early-out: bitmap test before validate + entry fetch,
* so the 999/1000 reject path skips the cache miss on the
* syscallentry that the EXPENSIVE block below used to require. */
if (syscall_is_expensive(syscallnr, do32) && !ONE_IN(1000))
goto retry;
if (validate_specific_syscall_silent(syscalls, syscallnr) == false) {
note_validation_failure(syscallnr, do32);
goto retry;
}
note_validation_success(syscallnr, do32);
/* Anti-prior accept gate. Applied AFTER the active/validate/
* EXPENSIVE correctness gates so a rejected anti-prior candidate
* goes back through the uniform pick rather than burning the gate
* budget on disabled or AVOID-flagged syscalls. Bounded retry
* budget so an extreme distribution falls back to uniform instead
* of wedging the picker. */
if (anti_prior_on && !plateau_anti_prior_accept(syscallnr)) {
anti_prior_attempts++;
if (anti_prior_attempts < ANTI_PRIOR_RETRY_CAP)
goto retry;
/* Budget exhausted -- accept the current candidate and let
* the next pick re-roll. The intervention's per-window
* shape stays anti-prior on average even if individual
* picks fall through. */
}
/* --cred-throttle gate. Same contract as the matching call site in
* set_syscall_nr_heuristic above: byte-identical default when the
* flag is off, and the outer_attempts budget absorbs the retries. */
if (cred_throttle_should_reject(syscallnr, do32))
goto retry;
/* Wall-lever SHADOW gate. Mirrors the call site in
* set_syscall_nr_heuristic above so plateau-active picks under both
* the bandit-heuristic and uniform-random arms feed the same shadow
* tally; the cold-skip-bypass logic that pulls the random arm into
* the plateau intervention windows is exactly where the
* dead-weight syscalls are most likely to be picked, so the random
* arm's contribution is the headline data point. Live picker is
* byte-identical -- the lever does NOT reject here. */
if (kcov_shm != NULL &&
__atomic_load_n(&kcov_shm->plateau_active, __ATOMIC_ACQUIRE)) {
__atomic_fetch_add(&shm->stats.wall_lever_eligible_total, 1UL,
__ATOMIC_RELAXED);
if (wall_lever_should_suppress_shadow(syscallnr)) {
__atomic_fetch_add(
&shm->stats.wall_lever_would_suppress_total,
1UL, __ATOMIC_RELAXED);
__atomic_fetch_add(
&shm->stats.wall_lever_would_suppress[syscallnr],
1UL, __ATOMIC_RELAXED);
}
}
srec_publish_begin(rec);
rec->do32bit = do32;
rec->nr = syscallnr;
srec_publish_end(rec);
return true;
}
/*
* Cold-syscall weight for the frontier picker's plateau fallback path.
* Returns a value in [0, FRONTIER_COLD_SCALE] that the accept gate in
* set_syscall_nr_coverage_frontier() consumes as the bias toward this
* syscall when the frontier ring has gone silent. Higher = more biased
* toward picking this syscall.
*
* Three regimes, deliberately distinguished:
*
* calls == 0 (never invoked)
* -- return FRONTIER_COLD_SCALE. Maximum bias. These are
* genuinely under-explored slots the picker should be steering
* to.
*
* calls > 0 && edges == 0 (invoked, never productive)
* -- return 0. Minimum bias. The syscall has had its shot and
* failed to produce any new coverage; biasing toward it pulls
* the picker into a bug-graveyard where it spends the plateau
* re-running calls that already established themselves as
* unproductive. The +1 smoothing on w in the caller's accept
* gate keeps these syscalls reachable at the uniform floor
* ((0+1)/(SCALE+1)) rather than starving entirely.
*
* calls > 0 && edges > 0 (invoked, some productivity)
* -- return SCALE - floor(SCALE * edges / calls). Linear inverse
* productivity, same shape as before but at the new SCALE
* resolution: a perfectly productive syscall (edges == calls)
* lands at 0, a syscall that has produced a small fraction of
* new edges across many calls keeps a near-full weight.
* edges <= calls by construction so the subtraction can't
* underflow.
*
* The previous shape conflated the first two regimes: both never-tried
* and tried-but-broken returned SCALE, so the plateau-fallback picker
* weighted the bug-graveyard identically to the genuinely under-explored
* frontier and burned its picks re-running known dead-ends. Splitting
* the two regimes is the headline fix; the SCALE bump (16 -> 256, see
* the FRONTIER_COLD_SCALE macro comment) is what lets the third regime's
* integer divide actually distinguish productivity below ~6% from MAX
* instead of flooring everything in that range to the ceiling.
*
* Semantics note: per_syscall_edges has "bumps by 1 per call that
* discovered >=1 new edge" semantics (see include/kcov.h), not raw
* bucket-edge counts, so edges <= calls by construction. Reads are
* RELAXED -- a stale snapshot is harmless; a racing kcov_collect bump
* that lands mid-pick only shifts the weight by one step, well inside
* the slack the outer accept/retry loop already tolerates.
*
* Returns the uniform-floor (FRONTIER_COLD_SCALE) when kcov_shm is
* unavailable so the caller's accept gate degrades to plain uniform
* pick rather than wedging on a NULL deref -- matches the kcov-less
* fallback the rest of the codebase already takes (see
* kcov_syscall_cold_skip_pct in kcov.c for the sibling pattern).
*/
static unsigned long frontier_cold_weight(unsigned int nr,
struct childdata *child)
{
unsigned long edges, calls;
unsigned long bucket_bits, distinct_pcs;
unsigned long transition_edges_real_local;
unsigned long old_weight, blend_weight;
unsigned long blend_productivity;
enum kcov_transition_reward_mode trew_mode;
if (kcov_shm == NULL || nr >= MAX_NR_SYSCALL)
return FRONTIER_COLD_SCALE;
calls = __atomic_load_n(&kcov_shm->per_syscall_calls[nr],
__ATOMIC_RELAXED);
/* Never invoked: MAX bias, genuinely under-explored. Bypass the
* shadow A/B math entirely -- both formulas agree on
* FRONTIER_COLD_SCALE in this case and the early return keeps the
* cold-path overhead untouched for syscalls that have never seen
* a single call. */
if (calls == 0)
return FRONTIER_COLD_SCALE;
edges = __atomic_load_n(&kcov_shm->per_syscall_edges[nr],
__ATOMIC_RELAXED);
/* OLD weight (call-count only): the live-path productivity signal
* this function has always returned. Logic preserved verbatim from
* the pre-blend implementation. Computed unconditionally so the
* SHADOW blend below can compare against it, then returned at the
* tail so the picker's per-syscall distribution stays byte-
* identical to today.
*
* edges == 0 -- invoked but never productive (bug-graveyard);
* edges >= calls -- RELAXED-load inversion against the steady-
* state edges <= calls invariant, treat as
* fully productive (would otherwise underflow
* the unsigned subtract).
*
* The caller's (w+1)/(SCALE+1) accept floor keeps a w == 0
* syscall reachable in both regimes. */
if (edges == 0)
old_weight = 0;
else if (edges >= calls)
old_weight = 0;
else
old_weight = FRONTIER_COLD_SCALE -
(edges * FRONTIER_COLD_SCALE) / calls;
/* BLENDED weight (formerly SHADOW-ONLY, now mode-gated): treat
* per_syscall_edges (call-count of productive calls) as the stable
* backbone and ADD logarithmic credit for three disjoint per-call
* yield signals:
*
* bucket_bits_real
* PC bit transitions across the AFL-style hit-count buckets
* (per_syscall_diag[].bucket_bits_real). Fires when a known
* edge moves into a never-seen hit-count bucket -- "new
* behaviour on known code". Weight 1x.
*
* distinct_pcs
* First-sight PC events (per_syscall_diag[].distinct_pcs):
* dedup_inc first-sightings of a PC the global bitmap had
* not seen. Unambiguous new coverage; weighted 2x to
* reflect higher signal-to-noise than the bucket-bit term.
*
* transition_edges_real_local (THIS COMMIT)
* New transition slots flipped (per_syscall_transition_edges_
* real_local): a 0 -> 1 in the (prev_canon_pc, cur_canon_pc)
* hash, restricted to local-mode traces. Fires when a new
* ORDERING between two PCs is observed -- can happen on
* warm-known code (a new route through already-mapped
* blocks). Weight 1x: symmetric to bucket_bits in that a
* transition can fire on already-known edges, so the
* higher-confidence 2x slot stays reserved for distinct_pcs.
*
* The three terms are STRICTLY DISJOINT discovery signals: a
* single PC-edge discovery bumps {edges, bucket_bits_real,
* distinct_pcs}; a single transition discovery bumps
* {transition_edges_real_local} and (via kcov_collect's separate
* branch) {per_syscall_transition_edges_real}. A call that
* discovers both kinds of novelty correctly contributes to both
* terms because two distinct novelty events happened -- there is
* no double-counting. Composition with the PC-edge backbone
* is coordinated with 86ee2986cec8 ("random-syscall: shadow-score
* blended frontier cold weight"), which introduced the bucket-bits
* and distinct-pcs terms; the disjoint transition term layered on
* top is what makes blend_weight differ from old_weight under
* COMBINED mode.
*
* Diag counters are split by [nr][do32]; sum both arch slots so
* the blend's productivity numerator pairs against the unsplit
* per_syscall_calls denominator above -- matches the unsplit
* per_syscall_edges shape the old branch uses. Transitions are
* unsplit by [do32] (the per_syscall_transition_edges family
* never grew the arch split), so a single load suffices for the
* transition term.
*
* ilog2() is the per-call contribution clamp on each term: a
* syscall whose single huge trace dumped a million transition
* slots contributes ~20 to the score, not a million, so one
* productive call cannot monopolize the frontier window.
*
* blend_productivity is capped at calls so the SCALE subtraction
* cannot underflow -- same invariant the OLD branch above relies
* on for the productive range.
*
* The transition term is folded only when kcov_transition_reward_
* mode != OFF. Under SHADOW_ONLY the term IS folded into
* blend_weight (so the A/B counters below measure the divergence
* the COMBINED switch would activate); the function still returns
* old_weight, so live selection stays byte-identical. Under OFF
* the term is zeroed so blend_weight reproduces the legacy formula
* exactly, keeping the A/B counters comparable to baseline runs. */
trew_mode = __atomic_load_n(&kcov_transition_reward_mode,
__ATOMIC_RELAXED);
bucket_bits = __atomic_load_n(
&kcov_shm->per_syscall_diag[nr][0].bucket_bits_real,
__ATOMIC_RELAXED) +
__atomic_load_n(
&kcov_shm->per_syscall_diag[nr][1].bucket_bits_real,
__ATOMIC_RELAXED);
distinct_pcs = __atomic_load_n(
&kcov_shm->per_syscall_diag[nr][0].distinct_pcs,
__ATOMIC_RELAXED) +
__atomic_load_n(
&kcov_shm->per_syscall_diag[nr][1].distinct_pcs,
__ATOMIC_RELAXED);
transition_edges_real_local =
(trew_mode == KCOV_TRANSITION_REWARD_OFF) ? 0UL :
__atomic_load_n(
&kcov_shm->per_syscall_transition_edges_real_local[nr],
__ATOMIC_RELAXED);
blend_productivity = edges +
(unsigned long)ilog2_ul(bucket_bits + 1UL) +
2UL * (unsigned long)ilog2_ul(distinct_pcs + 1UL) +
(unsigned long)ilog2_ul(transition_edges_real_local + 1UL);
if (blend_productivity >= calls)
blend_weight = 0;
else
blend_weight = FRONTIER_COLD_SCALE -
(blend_productivity * FRONTIER_COLD_SCALE) /
calls;
/* A/B counters. Bumped once per call so the operator can read
* the run-wide divergence pattern between the OLD (call-count
* only) and BLENDED (call-count + ilog2(bucket_bits) +
* 2*ilog2(distinct_pcs) + ilog2(transition_edges_real_local))
* productivity scores. Counter names predate the transition
* term but the semantics ("how often the blend would steer
* differently") are unchanged. The counters fire from both
* arms in lock-step so the would-be divergence stays observable
* regardless of which arm the calling child is stamped under;
* the LIVE behaviour delta from Arm B's blend_weight promotion
* shows up downstream in frontier_silent_picks / per-syscall
* pick rates rather than in these sums. */
__atomic_fetch_add(&shm->stats.frontier_blend_samples, 1UL,
__ATOMIC_RELAXED);
__atomic_fetch_add(&shm->stats.frontier_blend_old_weight_sum,
old_weight, __ATOMIC_RELAXED);
__atomic_fetch_add(&shm->stats.frontier_blend_new_weight_sum,
blend_weight, __ATOMIC_RELAXED);
if (blend_weight < old_weight)
__atomic_fetch_add(&shm->stats.frontier_blend_new_lower,
1UL, __ATOMIC_RELAXED);
else if (blend_weight > old_weight)
__atomic_fetch_add(&shm->stats.frontier_blend_new_higher,
1UL, __ATOMIC_RELAXED);
else
__atomic_fetch_add(&shm->stats.frontier_blend_new_equal,
1UL, __ATOMIC_RELAXED);
/* Per-child A/B arm promotes the blend (now including the
* transition term) to the live picker for half the children
* (Arm B); the other half (Arm A) returns the historical OLD
* weight so the picker's per-syscall distribution stays byte-
* identical to the pre-blend baseline for that cohort. The
* frontier_blend_* shm counters above record the would-be
* divergence for both arms in lock-step, so the operator can
* read the live promotion delta off a single run instead of
* gating it on a fleet-wide mode flip. child==NULL (parent
* context, should not reach here under the FRONTIER picker)
* falls back to the OLD weight to preserve baseline behaviour. */
if (child != NULL && child->frontier_blend_arm_b)
return blend_weight;
return old_weight;
}
/*
* Adaptive remote-KCOV mode disposition for the upcoming dispatch.
* Reads the per-syscall mode-keyed yield counters bumped in
* kcov_collect() (remote_pc_calls / remote_pc_edge_calls /
* local_pc_calls / local_pc_edge_calls in struct kcov_shared) and
* returns the adaptive remote_mode the upcoming call should run with;
* the caller threads that through the per-child Arm A/B gate so Arm A
* stays byte-identical to the static policy.
*
* Three dispositions can fire, mutually exclusive on the
* (entry->flags & KCOV_REMOTE_HEAVY) axis (DEMOTE on the HEAVY path,
* PROMOTE and FORCE on the non-HEAVY path; PROMOTE pre-empts FORCE):
*
* DEMOTE fires only on HEAVY-flagged syscalls whose static decision
* was remote_mode==true and whose lifetime remote_pc_calls
* has crossed REMOTE_ADAPTIVE_MIN_REMOTE_CALLS without ever
* producing a single remote_pc_edge_calls bump. The HEAVY
* rate (1-in-2) is wasted on that syscall in this kernel
* and the adaptive policy flips remote_mode to false so the
* call lands on the local PC fd instead.
*
* PROMOTE fires only on unflagged syscalls whose static decision was
* remote_mode==false, whose lifetime remote AND local samples
* have BOTH crossed their MIN_*_CALLS sample floors, whose
* remote sample produced at least one edge, AND whose remote
* edge rate beats the local edge rate by the configured
* REMOTE_ADAPTIVE_PROMOTE_MARGIN_NUM/PROMOTE_MARGIN_DEN
* relative margin. The comparison is performed via cross-
* multiplication so neither rate has to be divided -- the
* naive form
*
* remote_edge_calls / remote_pc_calls
* > local_edge_calls / local_pc_calls
*
* is replaced by
*
* remote_edge_calls * local_pc_calls * MARGIN_DEN
* > local_edge_calls * remote_pc_calls * MARGIN_NUM
*
* which is equivalent in the positive denominators the
* MIN_*_CALLS gates guarantee and never divides. Both
* products are checked with __builtin_mul_overflow; on
* overflow the promote disposition is suppressed (treated as
* agree with static) so a long run with very large counters
* cannot silently wrap into a false promote.
*
* FORCE fires only when the parent-published plateau hypothesis
* is PLATEAU_HYPOTHESIS_REMOTE_DOMINANT AND the unflagged-
* path PROMOTE disposition did NOT already fire on this
* call AND the syscall's lifetime remote sample has crossed
* the looser REMOTE_ADAPTIVE_PLATEAU_FORCE_MIN_REMOTE_CALLS
* floor AND its lifetime remote_pc_edge_calls is at least
* REMOTE_ADAPTIVE_PLATEAU_FORCE_MIN_EDGES (ever yielded).
* Widens promote during the plateau emergency: a remote-
* dominant plateau is direct evidence the fleet is making
* forward progress via remote sampling, so a proven remote
* yielder is worth keeping in the remote pool even before
* its rate has cleared the PROMOTE_MARGIN bar. The HEAVY
* DEMOTE branch is intentionally NOT widened (see the
* constant block in include/kcov.h for the rationale).
*
* SHADOW: bump one of remote_adaptive_{would_demote, would_promote,
* would_force, agree} per call and bump remote_adaptive_samples once.
* The bumps happen unconditionally on the helper entry path so both
* A/B arms contribute to the same denominator and the would-be
* divergence stays observable on Arm A (the control cohort) too.
*
* Returns the static decision verbatim when kcov_shm is unavailable or
* nr is out of range -- matches the kcov-less fallback the rest of the
* file already takes (see frontier_cold_weight above for the sibling
* pattern).
*/
static bool remote_adaptive_decide(unsigned int nr,
struct syscallentry *entry,
bool static_remote)
{
unsigned long rcalls, redgec, lcalls, ledgec;
bool would_demote = false, would_promote = false, would_force = false;
bool would_gate_promote = false;
bool adaptive_remote = static_remote;
if (kcov_shm == NULL || nr >= MAX_NR_SYSCALL || entry == NULL)
return static_remote;
rcalls = __atomic_load_n(&kcov_shm->remote_pc_calls[nr],
__ATOMIC_RELAXED);
redgec = __atomic_load_n(&kcov_shm->remote_pc_edge_calls[nr],
__ATOMIC_RELAXED);
lcalls = __atomic_load_n(&kcov_shm->local_pc_calls[nr],
__ATOMIC_RELAXED);
ledgec = __atomic_load_n(&kcov_shm->local_pc_edge_calls[nr],
__ATOMIC_RELAXED);
if ((entry->flags & KCOV_REMOTE_HEAVY) && static_remote) {
/* Demote: HEAVY syscall, static says remote, but the
* lifetime evidence is that remote sampling on this
* syscall has produced zero edges across enough samples
* to be confident. Flip to local. */
if (rcalls >= REMOTE_ADAPTIVE_MIN_REMOTE_CALLS &&
redgec == 0) {
adaptive_remote = false;
would_demote = true;
}
} else if (!(entry->flags & KCOV_REMOTE_HEAVY) && !static_remote) {
/* Promote: not HEAVY, static says local, but the
* lifetime evidence is that remote sampling on this
* syscall has out-yielded local by the configured
* margin. Both sample-size floors must be met and the
* remote sample must have produced at least one edge --
* otherwise the numerator is zero and the rate
* comparison is uninformative. */
if (rcalls >= REMOTE_ADAPTIVE_MIN_REMOTE_CALLS &&
lcalls >= REMOTE_ADAPTIVE_MIN_LOCAL_CALLS &&
redgec > 0) {
unsigned long lhs, rhs = 0;
bool ok;
ok = !__builtin_mul_overflow(redgec, lcalls, &lhs);
if (ok)
ok = !__builtin_mul_overflow(
lhs,
REMOTE_ADAPTIVE_PROMOTE_MARGIN_DEN,
&lhs);
if (ok)
ok = !__builtin_mul_overflow(
ledgec, rcalls, &rhs);
if (ok)
ok = !__builtin_mul_overflow(
rhs,
REMOTE_ADAPTIVE_PROMOTE_MARGIN_NUM,
&rhs);
if (ok && lhs > rhs) {
adaptive_remote = true;
would_promote = true;
/* Shadow plateau-gate evaluation: the
* proposed live gate would suppress this
* promote unless the current plateau
* hypothesis is REMOTE_DOMINANT. Sample
* the parent-published hypothesis via
* shm (same read pattern as the
* CMP_RISING_PC_FLAT consumer in
* dispatch_step's REDQUEEN gate -- the
* strategy.c-internal static is parent-
* private and stays stale across the
* fork boundary). Live disposition is
* not touched; the counter only records
* how often the gate would diverge from
* the current always-promote behaviour
* once it is flipped on by default. */
if (__atomic_load_n(
&shm->plateau_current_hypothesis,
__ATOMIC_RELAXED) !=
PLATEAU_HYPOTHESIS_REMOTE_DOMINANT)
would_gate_promote = true;
}
}
/* Plateau-aware widening of the promote branch. Only
* runs when the regular promote check did NOT already
* fire on this call (would_promote == false) so the
* disposition counters stay mutually exclusive; the
* mid-call sample of the parent-published plateau
* hypothesis matches the shadow-gate read above so the
* two predicates see the same hypothesis value. Sample
* floors are deliberately looser than the regular
* promote rule's: see the constant block in
* include/kcov.h for the per-floor justification. */
if (!would_promote &&
rcalls >= REMOTE_ADAPTIVE_PLATEAU_FORCE_MIN_REMOTE_CALLS &&
redgec >= REMOTE_ADAPTIVE_PLATEAU_FORCE_MIN_EDGES &&
__atomic_load_n(&shm->plateau_current_hypothesis,
__ATOMIC_RELAXED) ==
PLATEAU_HYPOTHESIS_REMOTE_DOMINANT) {
adaptive_remote = true;
would_force = true;
}
}
__atomic_fetch_add(&shm->stats.remote_adaptive_samples, 1UL,
__ATOMIC_RELAXED);
if (would_demote)
__atomic_fetch_add(&shm->stats.remote_adaptive_would_demote,
1UL, __ATOMIC_RELAXED);
else if (would_promote)
__atomic_fetch_add(&shm->stats.remote_adaptive_would_promote,
1UL, __ATOMIC_RELAXED);
else if (would_force)
__atomic_fetch_add(&shm->stats.remote_adaptive_would_force,