Skip to content

Commit bbe4990

Browse files
add container-monitor example
1 parent 600993f commit bbe4990

File tree

7 files changed

+774
-515
lines changed

7 files changed

+774
-515
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Container Monitor TUI
2+
3+
A beautiful terminal-based container monitoring tool that combines syscall tracking, file I/O monitoring, and network traffic analysis using eBPF.
4+
5+
## Features
6+
7+
- 🎯 **Interactive Cgroup Selection** - Navigate and select cgroups with arrow keys
8+
- 📊 **Real-time Monitoring** - Live graphs and statistics
9+
- 🔥 **Syscall Tracking** - Total syscall count per cgroup
10+
- 💾 **File I/O Monitoring** - Read/write operations and bytes with graphs
11+
- 🌐 **Network Traffic** - RX/TX packets and bytes with live graphs
12+
-**Efficient Caching** - Reduced /proc lookups for better performance
13+
- 🎨 **Beautiful TUI** - Clean, colorful terminal interface
14+
15+
## Requirements
16+
17+
- Python 3.7+
18+
- pythonbpf
19+
- Root privileges (for eBPF)
20+
21+
## Installation
22+
23+
```bash
24+
# Ensure you have pythonbpf installed
25+
pip install pythonbpf
26+
27+
# Run the monitor
28+
sudo $(which python) container_monitor.py
29+
```
30+
31+
## Usage
32+
33+
1. **Selection Screen**: Use ↑↓ arrow keys to navigate through cgroups, press ENTER to select
34+
2. **Monitoring Screen**: View real-time graphs and statistics, press ESC or 'b' to go back
35+
3. **Exit**: Press 'q' at any time to quit
36+
37+
## Architecture
38+
39+
- `container_monitor.py` - Main BPF program combining all three tracers
40+
- `data_collector.py` - Data collection, caching, and history management
41+
- `tui. py` - Terminal user interface with selection and monitoring screens
42+
43+
## BPF Programs
44+
45+
- **vfs_read/vfs_write** - Track file I/O operations
46+
- **__netif_receive_skb/__dev_queue_xmit** - Track network traffic
47+
- **raw_syscalls/sys_enter** - Count all syscalls
48+
49+
All programs filter by cgroup ID for per-container monitoring.
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
"""Container Monitor - TUI-based cgroup monitoring combining syscall, file I/O, and network tracking."""
2+
3+
import time
4+
import os
5+
from pathlib import Path
6+
from pythonbpf import bpf, map, section, bpfglobal, struct, BPF
7+
from pythonbpf.maps import HashMap
8+
from pythonbpf.helper import get_current_cgroup_id
9+
from ctypes import c_int32, c_uint64, c_void_p
10+
from vmlinux import struct_pt_regs, struct_sk_buff
11+
12+
from data_collector import ContainerDataCollector
13+
from tui import ContainerMonitorTUI
14+
15+
16+
# ==================== BPF Structs ====================
17+
18+
@bpf
19+
@struct
20+
class read_stats:
21+
bytes: c_uint64
22+
ops: c_uint64
23+
24+
25+
@bpf
26+
@struct
27+
class write_stats:
28+
bytes: c_uint64
29+
ops: c_uint64
30+
31+
32+
@bpf
33+
@struct
34+
class net_stats:
35+
rx_packets: c_uint64
36+
tx_packets: c_uint64
37+
rx_bytes: c_uint64
38+
tx_bytes: c_uint64
39+
40+
41+
# ==================== BPF Maps ====================
42+
43+
@bpf
44+
@map
45+
def read_map() -> HashMap:
46+
return HashMap(key=c_uint64, value=read_stats, max_entries=1024)
47+
48+
49+
@bpf
50+
@map
51+
def write_map() -> HashMap:
52+
return HashMap(key=c_uint64, value=write_stats, max_entries=1024)
53+
54+
55+
@bpf
56+
@map
57+
def net_stats_map() -> HashMap:
58+
return HashMap(key=c_uint64, value=net_stats, max_entries=1024)
59+
60+
61+
@bpf
62+
@map
63+
def syscall_count() -> HashMap:
64+
return HashMap(key=c_uint64, value=c_uint64, max_entries=1024)
65+
66+
67+
# ==================== File I/O Tracing ====================
68+
69+
@bpf
70+
@section("kprobe/vfs_read")
71+
def trace_read(ctx: struct_pt_regs) -> c_int32:
72+
cg = get_current_cgroup_id()
73+
count = c_uint64(ctx.dx)
74+
ptr = read_map.lookup(cg)
75+
if ptr:
76+
s = read_stats()
77+
s.bytes = ptr.bytes + count
78+
s.ops = ptr.ops + 1
79+
read_map.update(cg, s)
80+
else:
81+
s = read_stats()
82+
s.bytes = count
83+
s.ops = c_uint64(1)
84+
read_map.update(cg, s)
85+
86+
return c_int32(0)
87+
88+
89+
@bpf
90+
@section("kprobe/vfs_write")
91+
def trace_write(ctx1: struct_pt_regs) -> c_int32:
92+
cg = get_current_cgroup_id()
93+
count = c_uint64(ctx1.dx)
94+
ptr = write_map.lookup(cg)
95+
96+
if ptr:
97+
s = write_stats()
98+
s.bytes = ptr.bytes + count
99+
s.ops = ptr.ops + 1
100+
write_map.update(cg, s)
101+
else:
102+
s = write_stats()
103+
s.bytes = count
104+
s.ops = c_uint64(1)
105+
write_map.update(cg, s)
106+
107+
return c_int32(0)
108+
109+
110+
# ==================== Network I/O Tracing ====================
111+
112+
@bpf
113+
@section("kprobe/__netif_receive_skb")
114+
def trace_netif_rx(ctx2: struct_pt_regs) -> c_int32:
115+
cgroup_id = get_current_cgroup_id()
116+
skb = struct_sk_buff(ctx2.di)
117+
pkt_len = c_uint64(skb.len)
118+
119+
stats_ptr = net_stats_map.lookup(cgroup_id)
120+
121+
if stats_ptr:
122+
stats = net_stats()
123+
stats.rx_packets = stats_ptr.rx_packets + 1
124+
stats.tx_packets = stats_ptr.tx_packets
125+
stats.rx_bytes = stats_ptr.rx_bytes + pkt_len
126+
stats.tx_bytes = stats_ptr.tx_bytes
127+
net_stats_map.update(cgroup_id, stats)
128+
else:
129+
stats = net_stats()
130+
stats.rx_packets = c_uint64(1)
131+
stats.tx_packets = c_uint64(0)
132+
stats.rx_bytes = pkt_len
133+
stats.tx_bytes = c_uint64(0)
134+
net_stats_map.update(cgroup_id, stats)
135+
136+
return c_int32(0)
137+
138+
139+
@bpf
140+
@section("kprobe/__dev_queue_xmit")
141+
def trace_dev_xmit(ctx3: struct_pt_regs) -> c_int32:
142+
cgroup_id = get_current_cgroup_id()
143+
skb = struct_sk_buff(ctx3.di)
144+
pkt_len = c_uint64(skb.len)
145+
146+
stats_ptr = net_stats_map.lookup(cgroup_id)
147+
148+
if stats_ptr:
149+
stats = net_stats()
150+
stats.rx_packets = stats_ptr.rx_packets
151+
stats.tx_packets = stats_ptr.tx_packets + 1
152+
stats.rx_bytes = stats_ptr.rx_bytes
153+
stats.tx_bytes = stats_ptr.tx_bytes + pkt_len
154+
net_stats_map.update(cgroup_id, stats)
155+
else:
156+
stats = net_stats()
157+
stats.rx_packets = c_uint64(0)
158+
stats.tx_packets = c_uint64(1)
159+
stats.rx_bytes = c_uint64(0)
160+
stats.tx_bytes = pkt_len
161+
net_stats_map.update(cgroup_id, stats)
162+
163+
return c_int32(0)
164+
165+
166+
# ==================== Syscall Tracing ====================
167+
168+
@bpf
169+
@section("tracepoint/raw_syscalls/sys_enter")
170+
def count_syscalls(ctx: c_void_p) -> c_int32:
171+
cgroup_id = get_current_cgroup_id()
172+
count_ptr = syscall_count.lookup(cgroup_id)
173+
174+
if count_ptr:
175+
new_count = count_ptr + c_uint64(1)
176+
syscall_count.update(cgroup_id, new_count)
177+
else:
178+
syscall_count.update(cgroup_id, c_uint64(1))
179+
180+
return c_int32(0)
181+
182+
183+
@bpf
184+
@bpfglobal
185+
def LICENSE() -> str:
186+
return "GPL"
187+
188+
189+
# ==================== Main ====================
190+
191+
if __name__ == "__main__":
192+
print("🔥 Loading BPF programs...")
193+
194+
# Load and attach BPF program
195+
b = BPF()
196+
b.load()
197+
b.attach_all()
198+
199+
# Get map references and enable struct deserialization
200+
read_map_ref = b["read_map"]
201+
write_map_ref = b["write_map"]
202+
net_stats_map_ref = b["net_stats_map"]
203+
syscall_count_ref = b["syscall_count"]
204+
205+
read_map_ref.set_value_struct("read_stats")
206+
write_map_ref.set_value_struct("write_stats")
207+
net_stats_map_ref.set_value_struct("net_stats")
208+
209+
print("✅ BPF programs loaded and attached")
210+
211+
# Setup data collector
212+
collector = ContainerDataCollector(
213+
read_map_ref,
214+
write_map_ref,
215+
net_stats_map_ref,
216+
syscall_count_ref
217+
)
218+
219+
# Create and run TUI
220+
tui = ContainerMonitorTUI(collector)
221+
tui.run()

0 commit comments

Comments
 (0)