attsim/plot_propagation.py at main · ethp2p/attsim · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
#!/usr/bin/env python3
"""
Plot message propagation over time.

Plots the number of received messages by all nodes divided by the number of nodes
as a function of time, showing how messages propagate through the network.
"""

import sys
import os
import re
import argparse
from datetime import datetime
from typing import List, Tuple
import matplotlib.pyplot as plt
import json
import yaml


def parse_timestamp(timestamp_str: str) -> float:
    """Convert Shadow timestamp to seconds from simulation start."""
    # Format: 2000/01/01 00:02:00.000861
    dt = datetime.strptime(timestamp_str, "%Y/%m/%d %H:%M:%S.%f")
    # Shadow starts at 2000/01/01 00:00:00.000000
    start = datetime(2000, 1, 1, 0, 0, 0)
    delta = dt - start
    return delta.total_seconds()


def parse_shadow_logs(node_count: int, topology: dict) -> List[Tuple[float, int]]:
    """
    Parse Shadow logs to extract message reception times among subscribers.

    Returns:
        List of (timestamp, cumulative_total) tuples
    """
    events = []  # List of (timestamp, node_id)

    # Only parse logs for subscriber nodes (0 to nodes_to_subscribe-1)
    for node_id in topology["mesh_node_ids"] + topology["mesh_attester_node_ids"]:
        log_file = f"shadow.data/hosts/node{node_id}/gossipsub.1000.stderr"

        if not os.path.exists(log_file):
            print(f"Warning: Log file not found: {log_file}")
            continue

        with open(log_file, 'r') as f:
            for line in f:
                # Match: [gossipsub-node-X] YYYY/MM/DD HH:MM:SS.microseconds main.go:206: Received message N:
                match = re.search(r'\[gossipsub-node-\d+\] (\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\.\d+) main\.go:\d+: Received message \d+:', line)
                if match:
                    timestamp_str = match.group(1)
                    timestamp = parse_timestamp(timestamp_str)
                    events.append((timestamp, node_id))

    # Sort events by timestamp
    events.sort(key=lambda x: x[0])

    # Calculate cumulative totals
    cumulative_data = []
    total_count = 0

    for timestamp, node_id in events:
        total_count += 1
        cumulative_data.append((timestamp, total_count))

    return cumulative_data


def plot_propagation(node_count: int, output_file: str = 'message_propagation.png', topology_file: str = "",  peer_count: int = None, non_mesh_node_peer_count: int = None, simconfig_file: str = None):
    """Plot message propagation over time."""
    if peer_count is None:
        peer_count = 0  # Default for non-random-regular topologies
    if non_mesh_node_peer_count is None:
        non_mesh_node_peer_count = 0  # Default for non-random-regular topologies
    if simconfig_file is None:
        simconfig_file = "simconfig.yaml"  # Default simconfig file

    # Read the simconfig file to get slot_time
    simconfig = None
    with open(simconfig_file, 'r') as f:
        simconfig = yaml.safe_load(f)

    batch_interval = simconfig.get("prysm_validator", {}).get("batch_interval", 0)
    batch_verifier_time = simconfig.get("prysm_validator", {}).get("batch_verifier_time", 0)
    lighthouse_validator_time = simconfig.get("lighthouse_validator", {}).get("validator_time", 0)
    slots_to_run = simconfig.get("slots_to_run", 1)

    print(f"Prysm Batch interval: {batch_interval}ms")
    print(f"Prysm Batch verifier time: {batch_verifier_time}μs")
    print(f"Lighthouse validator time: {lighthouse_validator_time}μs")
    print(f"Slots to run: {slots_to_run}")

    # Read the topology json file
    topology = None
    with open(topology_file, 'r') as f:
        topology = json.load(f)

    mesh_nodes = len(topology["mesh_node_ids"])
    mesh_attester_nodes = len(topology["mesh_attester_node_ids"])
    non_mesh_attester_nodes = len(topology["non_mesh_attester_node_ids"])

    # Calculate the start time for the last slot (Nth slot where N = slots_to_run)
    # Genesis time is a Unix timestamp, but Shadow timestamps are relative to 2000/01/01 00:00:00
    # We need to convert genesis_time to Shadow's time system
    shadow_epoch = datetime(2000, 1, 1, 0, 2, 0)
    genesis_time_unix = simconfig["genesis_time"]
    genesis_time_dt = datetime.fromtimestamp(genesis_time_unix)

    slot_time = simconfig["slot_time"]

    nodes_to_publish = non_mesh_attester_nodes + mesh_attester_nodes
    nodes_to_subscribe = mesh_nodes + mesh_attester_nodes

    print(f"Parsing Shadow logs for {node_count} nodes with {non_mesh_attester_nodes} Non-Mesh Attesters and {mesh_attester_nodes} Mesh Attesters...")
    print(f"Tracking propagation to {nodes_to_subscribe} Subscribers from {nodes_to_publish} Publishers...")
    if peer_count > 0:
        print(f"Network topology: random-regular with {peer_count} peers per node")

    cumulative_data = parse_shadow_logs(node_count, topology)

    if not cumulative_data:
        print("Error: No message reception events found in logs")
        return

    # Debug: show timestamp range of all events
    if cumulative_data:
        all_timestamps = [t for t, _ in cumulative_data]
        min_time = min(all_timestamps)
        max_time = max(all_timestamps)
        print(f"Debug: All event timestamps range from {min_time:.1f}s to {max_time:.1f}s")

    # Since messages are only logged for the last slot (see gossipsub/main.go line 520),
    # all events in the logs are from the last slot only
    # Use the earliest timestamp as the baseline (when the last slot started / messages were published)
    earliest_timestamp = min(all_timestamps)
    last_slot_start_time = earliest_timestamp

    print(f"All events are from the last slot (slot {slots_to_run})")
    print(f"Using earliest event at {earliest_timestamp:.1f}s as slot start (0ms baseline)")

    # All events are already from the last slot, so no filtering needed
    # Just recalculate cumulative count to restart from 1 (in case there were any other logs earlier)
    filtered_data = []
    cumulative_count = 0
    for t, count in cumulative_data:
        cumulative_count += 1
        filtered_data.append((t, cumulative_count))

    # Calculate metric: percentage of total expected messages received
    timestamps = [t for t, _ in filtered_data]
    total_expected = nodes_to_publish * nodes_to_subscribe
    percentage_received = [(count / total_expected) * 100 for _, count in filtered_data]

    # Normalize timestamps relative to the start of the last slot
    # Subtract the earliest timestamp to get relative propagation time (0ms = when last slot started)
    timestamps = [(t - last_slot_start_time) * 1000 for t in timestamps]  # Convert to milliseconds

    # Create plot
    plt.figure(figsize=(10, 6))
    plt.plot(timestamps, percentage_received, linewidth=2)
    plt.xlabel('Time (ms)', fontsize=12)
    plt.ylabel('Cumulative Percentage of Messages Received (%)', fontsize=12)
    # Set x-axis to scale up to 700ms
    plt.xlim(left=0, right=1400)

    # Create title and subtitle
    title = 'Message Arrival Time Distribution'

    # Create a concise subtitle with key network info
    subtitle = f'{node_count} nodes | {mesh_attester_nodes} Mesh Attesters | {mesh_nodes} Mesh Nodes | {non_mesh_attester_nodes} Non-Mesh Attesters'

    # Get client split info if available
    client_split = simconfig.get("client_split", {})
    prysm_pct = client_split.get("prysm", 0)
    lighthouse_pct = client_split.get("lighthouse", 0)

    # Create detailed info text box
    info_lines = [
        f'Network: {node_count} nodes ({mesh_attester_nodes} MA, {mesh_nodes} MN, {non_mesh_attester_nodes} NMA)',
        f'Clients: {prysm_pct}% Prysm, {lighthouse_pct}% Lighthouse',
        f'Prysm: {batch_interval}ms interval, {batch_verifier_time}μs verifier',
        f'Lighthouse: {lighthouse_validator_time}μs validator',
        f'Topology: {peer_count} peers/node, {non_mesh_node_peer_count} peers/non-mesh',
        f'Slot: {slots_to_run} slots × {slot_time}s'
    ]

    info_text = '\n'.join(info_lines)

    # Set title and subtitle
    plt.title(title, fontsize=14, fontweight='bold')
    plt.suptitle(subtitle, fontsize=11, y=0.96, color='gray')

    # Add info box in mid-right corner of figure
    plt.figtext(0.98, 0.5, info_text,
                fontsize=8,
                verticalalignment='center',
                horizontalalignment='right',
                bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8),
                family='monospace')

    plt.grid(True, alpha=0.3)

    # Add horizontal line at 100% completion
    plt.axhline(y=100, color='r', linestyle='--', alpha=0.5, label='100% Complete')

    # Calculate percentile times
    percentile_times = {}
    percentiles = [25, 50, 66, 75, 90, 95, 99]

    for p in percentiles:
        target_percentage = p  # Direct percentage value
        percentile_time = None

        for i, percentage in enumerate(percentage_received):
            if percentage >= target_percentage:
                percentile_time = timestamps[i]
                break

        percentile_times[p] = percentile_time

    # Calculate 100th percentile (time when last message is received)
    if timestamps and percentage_received:
        percentile_times[100] = timestamps[-1]  # Last timestamp is when 100% is reached

    # Add percentile markers to plot
    colors = ['green', 'orange', 'black', 'purple', 'brown', 'pink', 'gray']
    for i, p in enumerate(percentiles):
        if percentile_times[p] is not None:
            plt.axvline(x=percentile_times[p], color=colors[i], linestyle=':', alpha=0.7,
                       label=f'{p}th percentile: {percentile_times[p]:.1f}ms')

    # Mark 100th percentile (last message) with a distinct style
    if percentile_times.get(100) is not None:
        plt.axvline(x=percentile_times[100], color='red', linestyle='--', linewidth=2, alpha=0.8,
                   label=f'100th percentile (last msg): {percentile_times[100]:.1f}ms')

    plt.legend()
    plt.tight_layout()

    # Save plot
    plt.savefig(output_file, dpi=150)
    print(f"\nPlot saved to: {output_file}")

    # Print statistics
    if filtered_data:
        start_time = timestamps[0]
        end_time = timestamps[-1]
        duration = end_time - start_time
        final_percentage = percentage_received[-1]

        print(f"\nPropagation Statistics (Subscribers) - Last Slot Only:")
        print(f"  Network: {node_count} nodes, {nodes_to_publish} publishers, {nodes_to_subscribe} subscribers", end="")
        if peer_count > 0:
            print(f", {peer_count} peers/node")
        else:
            print()
        print(f"  Measuring slot {slots_to_run} (last slot)")
        print(f"  Start time: {start_time:.3f}ms (relative to slot {slots_to_run} start)")
        print(f"  End time: {end_time:.3f}ms (relative to slot {slots_to_run} start)")
        print(f"  Duration: {duration:.3f}ms")
        print(f"  Final percentage: {final_percentage:.2f}% of messages received")
        print(f"  Expected: 100% of messages")
        print(f"  Total events in last slot: {len(filtered_data)}")

        print(f"\nPercentile Times (Time to reach X% of messages):")
        for p in percentiles:
            if percentile_times[p] is not None:
                print(f"  {p:2d}th percentile: {percentile_times[p]:.3f}ms")
            else:
                print(f"  {p:2d}th percentile: Not reached")

        # Print 100th percentile separately
        if percentile_times.get(100) is not None:
            print(f"  100th percentile (last message): {percentile_times[100]:.3f}ms")


def main():
    parser = argparse.ArgumentParser(
        description='Plot message propagation over time from Shadow simulation logs',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python3 plot_propagation.py 10
  python3 plot_propagation.py 20 -o propagation_20nodes.png
  python3 plot_propagation.py 10 --output results/test1.png
  python3 plot_propagation.py 20 -p 5 -s 15 -o propagation_5p_15s.png
        """
    )

    parser.add_argument('node_count', type=int,
                        help='Number of nodes in the simulation')
    parser.add_argument('-o', '--output', type=str,
                        default='message_propagation.png',
                        help='Output file path for the plot (default: message_propagation.png)')
    parser.add_argument('--peer-count', type=int,
                        help='Number of peers per node for random-regular topology (default: 0 for other topologies)')
    parser.add_argument('--non-mesh-node-peer-count', type=int,
                        help='Number of peers per non-mesh node for random-regular topology (default: 0 for other topologies)')
    parser.add_argument('--topology-file', type=str,
                        help='Topology file path (default: topology.json)')
    parser.add_argument('--simconfig-file', type=str,
                        help='Simulation config file path (default: simconfig.yaml)')

    args = parser.parse_args()

    if args.node_count <= 0:
        parser.error("node-count must be positive")

    if args.topology_file is None:
        parser.error("topology-file must be provided")

    if not os.path.exists("shadow.data"):
        print("Error: shadow.data directory not found. Run simulation first with 'make run-sim'")
        sys.exit(1)

    plot_propagation(args.node_count, args.output, args.topology_file, args.peer_count, args.non_mesh_node_peer_count, args.simconfig_file)


if __name__ == "__main__":
    main()