Skip to content

Commit

Permalink
BPF optimizer: Optimize redundant loads.
Browse files Browse the repository at this point in the history
This is useful in conjunction cases when a single argument is compared
against multiple values, e.g. for `fcntl(2)` where `args[1]` may be `F_GETFL`,
`F_SETFL`, or `F_GETFD`. There is no need to reload the value of `args[1]`
into the register if it is already the same as the one that was loaded from
the previous check.

This helps with syscall rules where one specific argument may be one of many
specific values, especially `ioctl` rules (critical for KVM and `nvproxy`
performance), and the benchmarks seem to agree:

```
# Time to evaluate seccomp filter for a syscall:
# (Many syscalls for which the rules are not "this argument must be one of
# these values" show no change and are omitted from this list)
                                      │    before    │                   after                   │
                                      │    sec/op    │     sec/op      vs base                   │
SentrySystrap/Postgres/futex            62.39n ± 13%    53.06n ±  39%        ~ (p=0.184 n=19+20)
SentrySystrap/Postgres/nanosleep        237.7n ± 33%    119.3n ± 142%        ~ (p=0.089 n=12)
[...]
SentryKVM/Postgres/ioctl                60.88n ± 17%    56.63n ±  12%   -6.98% (p=0.026 n=20+19)
SentryKVM/Postgres/rt_sigreturn         34.67n ± 32%    29.33n ±  49%        ~ (p=0.443 n=20+18)
[...]
NVProxyIoctl/nvproxy/ioctl_3222292175   58.81n ±  8%    54.85n ±   8%   -6.73% (p=0.015 n=29+30)
NVProxyIoctl/nvproxy/ioctl_3221767894   58.22n ±  5%    54.06n ±   6%   -7.14% (p=0.022 n=29+30)
NVProxyIoctl/nvproxy/ioctl_3222292009   58.33n ±  8%    55.67n ±   3%        ~ (p=0.070 n=28+30)
NVProxyIoctl/nvproxy/ioctl_3223340586   61.57n ±  7%    58.56n ±   7%        ~ (p=0.059 n=27+29)
NVProxyIoctl/nvproxy/ioctl_3223340587   63.10n ±  9%    58.78n ±   7%        ~ (p=0.140 n=29+30)
NVProxyIoctl/nvproxy/ioctl_3223864875   63.66n ±  7%    58.25n ±   3%   -8.50% (p=0.030 n=29)
NVProxyIoctl/nvproxy/ioctl_3224389163   64.12n ±  5%    60.96n ±   4%        ~ (p=0.114 n=29+30)
NVProxyIoctl/nvproxy/ioctl_3223078452   63.90n ±  9%    58.25n ±   6%   -8.84% (p=0.014 n=29)
NVProxyIoctl/nvproxy/ioctl_3222816309   66.07n ± 10%    58.08n ±   5%  -12.10% (p=0.003 n=29+30)
NVProxyIoctl/nvproxy/ioctl_3233302090   65.55n ±  7%    58.88n ±   8%  -10.17% (p=0.000 n=29+30)
NVProxyIoctl/nvproxy/ioctl_3224913486   65.62n ±  6%    61.10n ±   7%        ~ (p=0.072 n=28+29)
NVProxyIoctl/nvproxy/ioctl_3223340623   63.45n ±  7%    59.47n ±   6%   -6.26% (p=0.018 n=29+30)
NVProxyIoctl/nvproxy/ioctl_3223864926   66.35n ±  8%    59.45n ±   5%  -10.41% (p=0.000 n=29+30)
NVProxyIoctl/nvproxy/ioctl_805306369    62.98n ±  6%    56.26n ±   7%  -10.66% (p=0.006 n=29+30)
NVProxyIoctl/nvproxy/ioctl_75           69.09n ±  7%    64.87n ±   8%        ~ (p=0.083 n=29+28)
NVProxyIoctl/nvproxy/ioctl_805306370    72.30n ±  8%    57.81n ±   7%  -20.04% (p=0.000 n=29+30)
NVProxyIoctl/nvproxy/ioctl_23           79.59n ±  5%    73.65n ±   4%   -7.46% (p=0.001 n=29)
NVProxyIoctl/nvproxy/ioctl_24           79.70n ±  4%    73.86n ±   3%   -7.33% (p=0.014 n=29+30)
NVProxyIoctl/nvproxy/ioctl_25           79.32n ±  4%    77.69n ±   6%        ~ (p=0.468 n=29)
NVProxyIoctl/nvproxy/ioctl_26           80.15n ±  5%    76.61n ±   6%        ~ (p=0.140 n=29+30)
NVProxyIoctl/nvproxy/ioctl_27           80.02n ±  5%    75.77n ±   7%   -5.31% (p=0.003 n=29+30)
NVProxyIoctl/nvproxy/ioctl_28           80.52n ±  6%    76.79n ±   3%   -4.63% (p=0.028 n=29+30)
NVProxyIoctl/nvproxy/ioctl_33           85.05n ±  8%    78.23n ±   5%   -8.02% (p=0.028 n=29+30)
NVProxyIoctl/nvproxy/ioctl_34           83.57n ±  7%    78.74n ±   4%   -5.77% (p=0.035 n=29+30)
NVProxyIoctl/nvproxy/ioctl_37           82.20n ±  6%    79.85n ±   7%        ~ (p=0.474 n=29+30)
NVProxyIoctl/nvproxy/ioctl_38           85.38n ±  6%    78.23n ±   6%   -8.38% (p=0.001 n=29+30)
NVProxyIoctl/nvproxy/ioctl_39           85.53n ±  4%    79.87n ±   6%   -6.62% (p=0.007 n=29+30)
NVProxyIoctl/nvproxy/ioctl_45           86.29n ±  4%    83.78n ±   8%        ~ (p=0.176 n=29+30)
NVProxyIoctl/nvproxy/ioctl_65           86.89n ±  4%    82.06n ±   5%   -5.56% (p=0.028 n=29+30)
NVProxyIoctl/nvproxy/ioctl_68           88.80n ±  6%    80.78n ±   7%   -9.03% (p=0.000 n=29+30)
NVProxyIoctl/nvproxy/ioctl_72           90.13n ±  3%    82.77n ±   6%   -8.17% (p=0.005 n=29+30)
NVProxyIoctl/nvproxy/ioctl_73           87.23n ±  6%    81.86n ±   4%   -6.16% (p=0.039 n=29+30)
NVProxyIoctl/nvproxy/ioctl_21506        55.90n ±  7%    56.20n ±   6%        ~ (p=0.728 n=28+30)
NVProxyIoctl/nvproxy/ioctl_3224913447   57.26n ±  9%    55.15n ±   7%   -3.68% (p=0.028 n=27+30)
NVProxyIoctl/nvproxy-48                 69.44n ±  7%    64.64n ±   6%   -6.91% (p=0.002 n=10)
geomean                                 61.27n          56.73n          -7.41%

# Number of BPF instructions before optimizations:
                          │   before    │                after                 │
                          │  gen-instr  │  gen-instr   vs base                 │
SentrySystrap/Postgres-48   1.487k ± 0%   1.487k ± 0%       ~ (p=1.000 n=10) ¹
SentryKVM/Postgres-48       1.345k ± 0%   1.345k ± 0%       ~ (p=1.000 n=10) ¹
NVProxyIoctl/nvproxy-48     1.931k ± 0%   1.931k ± 0%       ~ (p=1.000 n=10) ¹
geomean                     1.569k        1.569k       +0.00%
¹ all samples are equal

# Number of BPF instructions after optimizations:
                          │   before   │               after                │
                          │ opt-instr  │ opt-instr   vs base                │
SentrySystrap/Postgres-48   382.0 ± 0%   355.0 ± 0%   -7.07% (p=0.000 n=10)
SentryKVM/Postgres-48       322.0 ± 0%   297.0 ± 0%   -7.76% (p=0.000 n=10)
NVProxyIoctl/nvproxy-48     473.0 ± 0%   409.0 ± 0%  -13.53% (p=0.000 n=10)
geomean                     387.5        350.7        -9.50%

# Ratio of post-optimizer #instructions vs pre-optimizer #instructions:
                          │      before       │                   after                   │
                          │ compression-ratio │ compression-ratio  vs base                │
SentrySystrap/Postgres-48          3.893 ± 0%          4.189 ± 0%   +7.60% (p=0.000 n=10)
SentryKVM/Postgres-48              4.177 ± 0%          4.529 ± 0%   +8.43% (p=0.000 n=10)
NVProxyIoctl/nvproxy-48            4.082 ± 0%          4.721 ± 0%  +15.65% (p=0.000 n=10)
geomean                            4.049               4.474       +10.50%

# Time to run optimizer:
                          │   before    │                after                │
                          │   opt-sec   │   opt-sec    vs base                │
SentrySystrap/Postgres-48   4.699m ± 3%   5.327m ± 4%  +13.34% (p=0.000 n=10)
SentryKVM/Postgres-48       4.130m ± 3%   4.651m ± 3%  +12.60% (p=0.000 n=10)
NVProxyIoctl/nvproxy-48     10.19m ± 3%   10.42m ± 1%   +2.26% (p=0.023 n=10)
geomean                     5.826m        6.367m        +9.28%
```

Note that this only applies in practice because we extract each 32-bit "half"
of each syscall argument into its own matcher rule. So the conjunction that
this optimizes for isn't exactly over a single argument, but over a single
32-bit half of an argument.

PiperOrigin-RevId: 595563115
  • Loading branch information
EtiennePerot authored and gvisor-bot committed Jan 4, 2024
1 parent 0bb4cfd commit c7e3ea3
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 0 deletions.
72 changes: 72 additions & 0 deletions pkg/bpf/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,77 @@ func optimizeJumpsToReturn(insns []Instruction) ([]Instruction, bool) {
return insns, changed
}

// removeRedundantLoads removes some redundant load instructions
// when the value in register A is already the same value as what is
// being loaded.
func removeRedundantLoads(insns []Instruction) ([]Instruction, bool) {
// reverseWalk maps instruction indexes I to the set of instruction indexes
// that, after their execution, may result in the control flow jumping to I.
reverseWalk := make([]map[int]struct{}, len(insns))
for pc := range insns {
reverseWalk[pc] = make(map[int]struct{})
}
for pc, ins := range insns {
if ins.IsReturn() {
continue // Return instructions are terminal.
}
if ins.IsJump() {
for _, offset := range ins.JumpOffsets() {
reverseWalk[pc+int(offset.Offset)+1][pc] = struct{}{}
}
continue
}
// All other instructions flow through.
reverseWalk[pc+1][pc] = struct{}{}
}

// Now look for redundant load instructions.
// We iterate backwards here so that we can remove instructions as we go
// without a past iteration interfering with the results of the current
// iteration. This is relying on the property that BPF programs may only
// flow forwards (no backwards jumps).
changed := false
for pc := len(insns) - 1; pc >= 0; pc-- {
ins := insns[pc]
if ins.OpCode&instructionClassMask != Ld {
continue
}
// Walk backwards until either we've reached the beginning of the program,
// or we've reached an operation which modifies register A.
lastModifiedA := -1
beforePCs := reverseWalk[pc]
walk:
for {
switch len(beforePCs) {
case 0:
// We've reached the beginning of the program without modifying A.
break walk
case 1:
var beforePC int
for bpc := range beforePCs { // Note: we know that this map only has one element.
beforePC = bpc
}
if !insns[beforePC].ModifiesRegisterA() {
beforePCs = reverseWalk[beforePC]
continue walk
}
lastModifiedA = beforePC
break walk
default:
// Multiple ways to get to `pc`.
// For simplicity, we only support the single-branch case right now.
break walk
}
}
if lastModifiedA != -1 && insns[pc].Equal(insns[lastModifiedA]) {
insns = append(insns[:pc], insns[pc+1:]...)
decrementJumps(insns, pc)
changed = true
}
}
return insns, changed
}

// jumpRewriteOperation rewrites a jump target.
type jumpRewriteOperation struct {
pc int // Rewrite instruction at this offset.
Expand Down Expand Up @@ -587,6 +658,7 @@ func Optimize(insns []Instruction) []Instruction {
optimizeJumpsToReturn,
removeZeroInstructionJumps,
removeDeadCode,
removeRedundantLoads,
optimizeJumpsToSmallestSetOfReturns,
})
}
44 changes: 44 additions & 0 deletions pkg/bpf/optimizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,50 @@ func TestOptimize(t *testing.T) {
Stmt(Ret|K, 0),
},
},
{
name: "redundant loads removed",
optimizers: []optimizerFunc{
removeRedundantLoads,
},
insns: []Instruction{
Stmt(Ld|Imm|W, 42), // Not removed.
Jump(Jmp|Jeq|K, 42, 0, 0),
Jump(Jmp|Jgt|K, 42, 1, 0),
Stmt(Ld|Imm|W, 42), // Removed as it is equal in all cases.
Jump(Jmp|Jeq|K, 42, 1, 0),
Stmt(Ld|Imm|W, 43),
Stmt(Ld|Imm|W, 43), // Not removed as the jump above may skip over previous instruction.
Stmt(Ld|Imm|W, 43), // Removed.
Stmt(Ld|Imm|W, 44), // Not removed.
Stmt(Ld|Imm|W, 44), // Removed.
Stmt(Ld|Abs|W, 0), // Not removed.
Stmt(Ld|Abs|W, 0), // Removed.
Stmt(Ld|Abs|W, 0), // Removed.
Stmt(Ld|Abs|W, 4), // Not removed.
Stmt(Ld|Abs|W, 0), // Not removed.
Stmt(Ld|Abs|W, 11), // Not removed.
Jump(Jmp|Jeq|K, 42, 1, 0), // "True" branch will be culled.
Stmt(Ld|Abs|W, 11), // Removed as there is only one way to get here and it already loads 11.
Stmt(Ld|Abs|W, 11), // There are two branches to get here but the first gets culled, so it is still removed.
Stmt(Ld|Abs|W, 11), // Removed.
Stmt(Ret|K, 0),
},
want: []Instruction{
Stmt(Ld|Imm|W, 42),
Jump(Jmp|Jeq|K, 42, 0, 0),
Jump(Jmp|Jgt|K, 42, 0, 0),
Jump(Jmp|Jeq|K, 42, 1, 0),
Stmt(Ld|Imm|W, 43),
Stmt(Ld|Imm|W, 43),
Stmt(Ld|Imm|W, 44),
Stmt(Ld|Abs|W, 0),
Stmt(Ld|Abs|W, 4),
Stmt(Ld|Abs|W, 0),
Stmt(Ld|Abs|W, 11),
Jump(Jmp|Jeq|K, 42, 0, 0),
Stmt(Ret|K, 0),
},
},
{
name: "jumps to return",
optimizers: []optimizerFunc{
Expand Down

0 comments on commit c7e3ea3

Please sign in to comment.