diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..a4959c5 --- /dev/null +++ b/.flake8 @@ -0,0 +1,2 @@ +[flake8] +ignore = E501, E722 diff --git a/.gitignore b/.gitignore index 0a19790..0a579d0 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,12 @@ __pycache__/ # C extensions *.so +# Json files +*.json + +# Pdf files +*.pdf + # Distribution / packaging .Python build/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..1d029bf --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + + - repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + args: + - "--config=.flake8" + + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort diff --git a/.vscode/ltex.dictionary.en-US.txt b/.vscode/ltex.dictionary.en-US.txt new file mode 100644 index 0000000..2f7628b --- /dev/null +++ b/.vscode/ltex.dictionary.en-US.txt @@ -0,0 +1,2 @@ +AhoCorasikParser +NodeLogsChecker diff --git a/README.md b/README.md index 62ac2cf..79f81b4 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,61 @@ # mx-chain-logs-parser -Logs parsing utilities. +Logs parsing utilities and applications + +## LOGS PARSER TOOLS: +The tool provides general abstract classes that can be useful for parsing logs. +In order to create an application that uses off-line parsing of logs files, these classes must be inherited and methods should be implemented for that particular case. + +### ARCHIVE HANDLER +- General application processing class, that loops through the nodes in the downloaded logs archive and calls its NodeLogsChecker instance for each one of them +- run level methods should be implemented in inheriting classes + +### NODE LOGS CHECKER +- Node level processing, that loops through individual log files for a node and calls its instance of the AhoCorasikParser to search for entries with pre-defined key phrases +- node level methods should be implemented in inheriting classes + +### AHO-CORASIK PARSER +- Log level processing implementing the Aho-Corasick algorithm that searches for a list of given keywords simultaneously. It uses an *EntryParser* to extract information from the entries of interest + +### ENTRY PARSER +- Entry level processing, divides the log entry into its basic components: log level, context, message, parameters +- can be extended with re recognition to handle specific cases + + +## CROSS SHARD ANALYSIS TOOL +This tool validates that cross shard mini-blocks are executed (and proposed) in strict order, without gaps or duplications. +It uses color coded data to illustrate each state in the processing. A configuration file (issues.py) is provided to signal certain issues with the miniblock production. + +The tool creates a run specific folder under Reports that includes parsed headers in the *Shards* subfolder, mini-blocks in the *Miniblocks* folder. +The generated reports will also be included in this folder, in individual sub-folders named after the respective report: +- **MiniblocksShardTimeline** contains a report that goes through rounds and displays what mini-blocks where proposed, executed or notarized for each shard; individual pdf files are generated for each epoch; +- **MiniblocksTimelineDetails** will produce a timeline of mini-blocks for each shard, type of miniblock and other information is included for each one of them; +- **NonceTimeline** ; will produce a timeline of headers processed, originating from each shard. Alarms, like round gaps, missing are representedd by colored borders; +- **NonceAlarms** this report is similar to the NonceTimeline report, but only includes headers that have issues. The report is divided into chapters for each type of alarm. A header may be included in more than one such category, depending on its characteristics. + +INSTALL +Create a virtual environment and install the dependencies: + +``` +python3 -m venv ./venv +source ./venv/bin/activate +pip install -r ./requirements.txt --upgrade +export PYTHONPATH=. +``` + +INSTALL DEVELOPMENT DEPENDENCIES +``` +pip install -r ./requirements-dev.txt --upgrade +``` + +EXAMPLE USAGE +``` +python -m multiversx_cross_shard_analysis.gather_data --path ~/Downloads/cross-shard-execution-anal-9afe696daf.zip +``` +where the argument --path is mandatory, describing the path to the zip file containing the logs. +The command will also generate all reports available, saving them inside a subfolder of Reports with the same name as the zip file provided. + +In order to run a specific report from the report folder: +``` +python -m multiversx_cross_shard_analysis.headers_timeline_report --run-name cross-shard-execution-anal-6cc663f7af +``` +where --run-name is the name of the subfolder where the run's files reside. \ No newline at end of file diff --git a/multiversx_cross_shard_analysis/__init__.py b/multiversx_cross_shard_analysis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/multiversx_cross_shard_analysis/constants.py b/multiversx_cross_shard_analysis/constants.py new file mode 100644 index 0000000..a34da72 --- /dev/null +++ b/multiversx_cross_shard_analysis/constants.py @@ -0,0 +1,121 @@ +from enum import Enum + +from reportlab.lib import colors + +origin_shard = "origin_shard" +dest_shard = "dest_shard" +meta = "meta" +proposed = "proposed" +committed = "committed" + +MiniBlockTypes = Enum("MiniBlockType", [ + 'MiniBlockHeaders', + 'ShardInfo', + 'ExecutionResults' +]) + +MentionType = Enum("MentionType", [ + # miniblock is mentioned in origin shard header + "origin_shard_proposed", + "origin_shard_committed", + + # miniblock is mentioned in an execution result, either on origin or destination shard + "origin_exec_proposed", + "origin_exec_committed", + + # notarization of shard miniblock when meta includes the shard header + "meta_origin_shard_proposed", + "meta_origin_shard_committed", + + # miniblock is mentioned in destination shard header + "dest_shard_proposed", + "dest_shard_committed", + + # miniblock is mentioned in an execution result, either on origin or destination shard + "dest_exec_proposed", + "dest_exec_committed", + + # notarization of shard miniblock when meta includes the shard header + "meta_dest_shard_proposed", + "meta_dest_shard_committed", + + # notarization of execution results when meta includes the header containing the execution result for origin shard + "meta_origin_exec_proposed", + "meta_origin_exec_committed", + + # notarization of execution results when meta includes the header containing the execution result for destination shard + "meta_dest_exec_proposed", + "meta_dest_exec_committed", +]) + + +# Mappings from field number to field name for MiniBlockHeaderReserved +FIELD_NAME_MAPPING = { + 1: "ExecutionType", + 2: "State", + 3: "IndexOfFirstTxProcessed", + 4: "IndexOfLastTxProcessed", +} + +# Mappings for enum values from block.proto +PROCESSING_TYPE_MAPPING = { + 0: "Normal", + 1: "Scheduled", + 2: "Processed", +} + +# Mappings for miniblock state enum values from block.proto +MINIBLOCK_STATE_MAPPING = { + 0: "Final", + 1: "Proposed", + 2: "PartialExecuted", +} + +# type names +TYPE_NAMES = { + 0: "TxBlock", + 30: "StateBlock", + 60: "PeerBlock", + 90: "SCResultBlock", + 120: "InvalidBlock", + 150: "ReceiptBlock", + 255: "RewardsBlock", +} + +Colors = Enum("Colors", [ + "origin_proposed", + "origin_partial_executed", + "origin_final", + "dest_proposed", + "dest_partial_executed", + "dest_final", + "meta_origin_committed", + "meta_dest_committed", + "origin_exec_proposed", + "origin_exec_partial_executed", + "origin_exec_final", + "dest_exec_proposed", + "dest_exec_partial_executed", + "dest_exec_final", + "meta_origin_exec_committed", + "meta_dest_exec_committed", +]) + +COLORS_MAPPING = { + Colors.origin_proposed: colors.lightyellow, + Colors.origin_partial_executed: colors.orange, + Colors.origin_final: colors.yellow, + Colors.dest_proposed: colors.mistyrose, + Colors.dest_partial_executed: colors.palevioletred, + Colors.dest_final: colors.pink, + Colors.meta_origin_committed: colors.lightgreen, + Colors.meta_dest_committed: colors.lightblue, + Colors.origin_exec_proposed: colors.khaki, + Colors.origin_exec_partial_executed: colors.gold, + Colors.origin_exec_final: colors.yellow, + Colors.dest_exec_proposed: colors.lightcoral, + Colors.dest_exec_partial_executed: colors.crimson, + Colors.dest_exec_final: colors.pink, + Colors.meta_origin_exec_committed: colors.mediumseagreen, + Colors.meta_dest_exec_committed: colors.cornflowerblue, +} diff --git a/multiversx_cross_shard_analysis/decode_reserved.py b/multiversx_cross_shard_analysis/decode_reserved.py new file mode 100644 index 0000000..21908d3 --- /dev/null +++ b/multiversx_cross_shard_analysis/decode_reserved.py @@ -0,0 +1,66 @@ + +from typing import Any + +from multiversx_cross_shard_analysis.constants import (FIELD_NAME_MAPPING, + MINIBLOCK_STATE_MAPPING, + PROCESSING_TYPE_MAPPING) + + +def get_default_decoded_data(tx_count: int) -> dict[str, Any]: + """ + Returns a dictionary with the default values for the MiniBlockHeaderReserved struct. + """ + return { + "ExecutionType": "Normal", + "State": "Final", + "IndexOfFirstTxProcessed": 0, + "IndexOfLastTxProcessed": tx_count - 1 if tx_count > 0 else 0, + } + + +def decode_reserved_field(hex_string: str, tx_count: int) -> dict[str, Any]: + """ + Decodes the reserved field from a hex string into a human-readable dictionary, + including default values for missing fields. + """ + decoded_data = get_default_decoded_data(tx_count) + + if not hex_string: + return {} + + byte_data = bytes.fromhex(hex_string) + i = 0 + while i < len(byte_data): + field_and_type = byte_data[i] + field_number = field_and_type >> 3 + wire_type = field_and_type & 0x07 + i += 1 + + if wire_type == 0: # Varint + value = 0 + shift = 0 + while True: + if i >= len(byte_data): + decoded_data["error"] = "Incomplete varint data" + return decoded_data + byte = byte_data[i] + value |= (byte & 0x7F) << shift + i += 1 + if not (byte & 0x80): + break + shift += 7 + + field_name = FIELD_NAME_MAPPING.get(field_number, f"UnknownField_{field_number}") + + if field_name == "ExecutionType": + decoded_data[field_name] = PROCESSING_TYPE_MAPPING.get(value, f"UnknownProcessingType_{value}") + elif field_name == "State": + decoded_data[field_name] = MINIBLOCK_STATE_MAPPING.get(value, f"UnknownState_{value}") + else: + decoded_data[field_name] = value + + else: + decoded_data["error"] = f"Unsupported wire type: {wire_type}" + break + + return decoded_data diff --git a/multiversx_cross_shard_analysis/gather_data.py b/multiversx_cross_shard_analysis/gather_data.py new file mode 100644 index 0000000..5f46226 --- /dev/null +++ b/multiversx_cross_shard_analysis/gather_data.py @@ -0,0 +1,76 @@ +import os +from datetime import datetime, timedelta + +from multiversx_cross_shard_analysis.headers_timeline_report import \ + build_nonce_timeline_pdf +from multiversx_cross_shard_analysis.miniblock_data import MiniblockData +from multiversx_cross_shard_analysis.miniblocks_round_report import \ + build_report +from multiversx_cross_shard_analysis.miniblocks_timeline_report import \ + build_pdf_from_miniblocks + +from .headers_alarms_report import build_nonce_alarms_timeline_pdf +from .header_analysis_archive_handler import HeaderAnalysisArchiveHandler +from .header_analysis_checker import HeaderAnalysisChecker +from .header_analysis_parser import HeaderAnalysisParser + + +def gather_data(): + time_started = datetime.now() + print('Starting cross-shard analysis...') + args = HeaderAnalysisArchiveHandler.get_path() + header_checker = HeaderAnalysisChecker(HeaderAnalysisParser, args) + handler = HeaderAnalysisArchiveHandler(header_checker, args.path) + handler.handle_logs() + print(f'Archive checked successfully: {timedelta(seconds=(datetime.now() - time_started).total_seconds())}s') + + # Generate reports + mb_data = MiniblockData(handler.shard_data.miniblocks).get_data_for_round_report() + out_folder = os.path.join(handler.run_name, "MiniblocksShardTimeline") + out_folder = os.path.join('Reports', out_folder) + os.makedirs(out_folder, exist_ok=True) + + # generate PDFs per epoch + for epoch in sorted(mb_data.keys()): + print(f"Epoch: {epoch}") + report_dict = mb_data[epoch] + outfile = os.path.join(out_folder, f"shards_timeline_report_{epoch}.pdf") + build_report(int(epoch), report_dict, shards=[0, 1, 2, 4294967295], outname=outfile) + print("→", outfile) + + mb_data = MiniblockData(handler.shard_data.miniblocks).get_data_for_detail_report() + out_folder = os.path.join(handler.run_name, "MiniblocksTimelineDetail") + out_folder = os.path.join('Reports', out_folder) + os.makedirs(out_folder, exist_ok=True) + + for epoch in sorted(mb_data.keys()): + print(f"Epoch: {epoch}") + outfile = os.path.join(out_folder, f"miniblock_timeline_report_epoch_{epoch}.pdf") + build_pdf_from_miniblocks(int(epoch), mb_data[epoch], outname=outfile) + print("→", outfile) + + input_data, nonce_alarms = MiniblockData(handler.shard_data.miniblocks).get_data_for_header_report() + out_folder = os.path.join(handler.run_name, "NonceTimeline") + out_folder = os.path.join('Reports', out_folder) + os.makedirs(out_folder, exist_ok=True) + + for epoch in sorted(input_data.keys()): + print(f"Epoch: {epoch}") + outfile = os.path.join(out_folder, f"nonce_timeline_report_{epoch}.pdf") + build_nonce_timeline_pdf(input_data[epoch], nonce_alarms, outname=outfile) + print("→", outfile) + + input_data = MiniblockData(handler.shard_data.miniblocks).get_data_for_header_alarms_report() + out_folder = os.path.join(handler.run_name, "NonceAlarms") + out_folder = os.path.join('Reports', out_folder) + os.makedirs(out_folder, exist_ok=True) + + for epoch in sorted(input_data.keys()): + print(f"Epoch: {epoch}") + outfile = os.path.join(out_folder, f"nonce_alarms_report_{epoch}.pdf") + build_nonce_alarms_timeline_pdf(input_data[epoch], outname=outfile) + print("→", outfile) + + +if __name__ == "__main__": + gather_data() diff --git a/multiversx_cross_shard_analysis/header_analysis_archive_handler.py b/multiversx_cross_shard_analysis/header_analysis_archive_handler.py new file mode 100644 index 0000000..c0be1f4 --- /dev/null +++ b/multiversx_cross_shard_analysis/header_analysis_archive_handler.py @@ -0,0 +1,50 @@ +import json +from pathlib import Path + +from multiversx_logs_parser_tools.archive_handler import ArchiveHandler + +from .header_analysis_checker import HeaderAnalysisChecker +from .header_structures import HeaderData, ShardData + + +class HeaderAnalysisArchiveHandler(ArchiveHandler): + def __init__(self, checker: HeaderAnalysisChecker, logs_path: str): + self.checker = checker + self.shard_data = ShardData() + + super().__init__(checker, logs_path) + + def process_node_data(self): + """Process the parsed data for a single node.""" + node_data = HeaderData() + node_data.header_dictionary = self.checker.parsed + self.shard_data.add_node(node_data) + + def process_run_data(self): + """Process the parsed data for the entire run.""" + self.write_run_json() + + def write_run_json(self, path=''): + for shard_id, header_data in self.shard_data.parsed_headers.items(): + run_data = { + "run_name": self.run_name, + "shard_id": shard_id, + "shards": header_data.header_dictionary + } + shard_reports_path = f'./Reports/{self.run_name}/Shards' + output_file = Path(f'{shard_reports_path}/{shard_id}_report.json') + directory = output_file.parent + directory.mkdir(parents=True, exist_ok=True) + with open(output_file, 'w') as f: + json.dump(run_data, f, indent=4) + print(f"Shard data for shard {shard_id} written to {output_file}") + miniblocks_reports_path = f'./Reports/{self.run_name}/Miniblocks' + output_file = Path(f'{miniblocks_reports_path}/miniblocks_report.json') + directory = output_file.parent + directory.mkdir(parents=True, exist_ok=True) + with open(output_file, 'w') as f: + json.dump({ + "run_name": self.run_name, + "miniblocks": self.shard_data.miniblocks + }, f, indent=4) + print(f"Miniblock data written to {output_file}") diff --git a/multiversx_cross_shard_analysis/header_analysis_checker.py b/multiversx_cross_shard_analysis/header_analysis_checker.py new file mode 100644 index 0000000..ac3e606 --- /dev/null +++ b/multiversx_cross_shard_analysis/header_analysis_checker.py @@ -0,0 +1,35 @@ +from argparse import Namespace +from typing import Any + +from multiversx_logs_parser_tools.node_logs_checker import NodeLogsChecker + +from .header_analysis_parser import HeaderAnalysisParser +from .header_structures import HeaderData + + +class HeaderAnalysisChecker(NodeLogsChecker): + def __init__(self, parser_cls: type[HeaderAnalysisParser], args: Namespace): + super().__init__(parser_cls, args) + + def initialize_checker(self, args): + self.parsed = HeaderData().header_dictionary + return super().initialize_checker(args) + + def process_parsed_result(self): + self.parsed = self.parser.parsed_headers.header_dictionary.copy() + self.parser.initialize_checker() + + def post_process_node_logs(self): + # Implement post-processing logic here + self.write_node_json() + + def create_json_for_node(self) -> dict[str, Any]: + return { + "node_name": self.node_name, + "run_name": self.run_name, + "header_analysis": self.parsed + } + + def reset_node(self, args: Namespace): + super().reset_node(args) + self.parsed = HeaderData().header_dictionary.copy() diff --git a/multiversx_cross_shard_analysis/header_analysis_parser.py b/multiversx_cross_shard_analysis/header_analysis_parser.py new file mode 100644 index 0000000..a529189 --- /dev/null +++ b/multiversx_cross_shard_analysis/header_analysis_parser.py @@ -0,0 +1,45 @@ +import json +from re import Pattern +from typing import Any + +from multiversx_logs_parser_tools.aho_corasik_parser import AhoCorasickParser + +from .header_structures import HeaderData + + +class HeaderAnalysisParser(AhoCorasickParser): + def __init__(self): + self.parsed_headers = HeaderData() + super().__init__() + + def get_patterns(self) -> list[tuple[Pattern[str], int]]: + patterns = [] + patterns.append(('Proposed header received', 0)) + patterns.append(('Proposed header sent', 1)) + patterns.append(('Proposed header committed', 2)) + return patterns + + def initialize_checker(self) -> None: + # Initialize any required state or variables for the checker + self.parsed_headers.reset() + + def process_match(self, line: str, end_index: int, pattern_idx: int, args: dict[str, str]) -> dict[str, Any]: + parsed = super().process_match(line, end_index, pattern_idx, args) + # Additional processing specific to header checking can be added here + if pattern_idx < 3 and 'parameters' in parsed: + parameter = parsed.pop('parameters').split(' = ', 1)[1] + header = json.loads(parameter) + if pattern_idx < 2: + self.parsed_headers.add_proposed_header(header) + elif pattern_idx == 2: + self.parsed_headers.add_committed_header(header) + + return {} + + def process_parsed_entry(self, parsed_entry: dict[str, Any], args: dict[str, str]) -> None: + # Process the parsed log entry specific to header checking + pass + + def should_parse_line(self, pattern: Pattern[str]) -> bool: + # Determine if the line should be parsed based on the pattern + return True diff --git a/multiversx_cross_shard_analysis/header_structures.py b/multiversx_cross_shard_analysis/header_structures.py new file mode 100644 index 0000000..1255da3 --- /dev/null +++ b/multiversx_cross_shard_analysis/header_structures.py @@ -0,0 +1,207 @@ + +from typing import Any + +from multiversx_cross_shard_analysis.miniblock_data import MiniblockData +from multiversx_cross_shard_analysis.decode_reserved import decode_reserved_field + +from .constants import (COLORS_MAPPING, TYPE_NAMES, dest_shard, meta, + origin_shard) + + +def get_value(variable_name: str, header: dict[str, Any]) -> Any: + return header['header'][variable_name] if 'header' in header else header[variable_name] + + +def get_shard_id(header: dict[str, Any]) -> int: + return header['header']['shardID'] if 'header' in header else header.get('shardID', 4294967295) + + +class HeaderData: + def __init__(self): + self.header_dictionary = { + 'proposed_headers': [], + 'committed_headers': [] + } + self.seen_headers: dict[str, set[str]] = {'proposed_headers': set(), + 'committed_headers': set()} + + def reset(self): + self.header_dictionary = { + 'proposed_headers': [], + 'committed_headers': [] + } + self.seen_headers: dict[str, set[str]] = {'proposed_headers': set(), + 'committed_headers': set()} + + def add_proposed_header(self, header: dict[str, Any]) -> bool: + nonce = get_value('nonce', header) + if nonce in self.seen_headers['proposed_headers']: + return False + self.header_dictionary['proposed_headers'].append(header) + self.seen_headers['proposed_headers'].add(nonce) + return True + + def add_committed_header(self, header: dict[str, Any]) -> bool: + nonce = get_value('nonce', header) + if nonce in self.seen_headers['committed_headers']: + return False + self.header_dictionary['committed_headers'].append(header) + self.seen_headers['committed_headers'].add(nonce) + return True + + +class ShardData: + def __init__(self): + self.parsed_headers = {0: HeaderData(), 1: HeaderData(), 2: HeaderData(), 4294967295: HeaderData()} + self.miniblocks = {} + self.seen_miniblock_hashes = set() + + def add_node(self, node_data: HeaderData): + if node_data.header_dictionary['committed_headers'] == []: + node_data.header_dictionary['committed_headers'] = node_data.header_dictionary['proposed_headers'].copy() + for header_status in node_data.header_dictionary.keys(): + for header in node_data.header_dictionary[header_status]: + shard_id = get_shard_id(header) + added = False + if header_status == 'committed_headers': + added = self.parsed_headers[shard_id].add_committed_header(header) + elif header_status == 'proposed_headers': + added = self.parsed_headers[shard_id].add_proposed_header(header) + else: + print(f"Warning: Unknown header status {header_status} in header: round = {get_value('round', header)}, nonce = {get_value('nonce', header)}") + if added: + self.add_miniblocks(header, header_status) + + def add_miniblocks(self, header: dict[str, Any], status: str): + header_struct = Header(header, status) + + for mention_type, mb, metadata in header_struct.miniblocks: + mb_hash = mb.get('hash') + if mb_hash not in self.seen_miniblock_hashes: + self.seen_miniblock_hashes.add(mb_hash) + self.miniblocks[mb_hash] = mb.copy() + self.miniblocks[mb_hash]['mentioned'] = [] + # metadata = header_struct.metadata.copy() + metadata["reserved"] = decode_reserved_field(mb.get("reserved", ""), mb.get("txCount", 0)) + self.miniblocks[mb_hash]['mentioned'].append((mention_type, metadata)) + + def get_data_for_header_horizontal_report(self) -> dict[str, dict[int, Any]]: + miniblocks = MiniblockData(self.miniblocks) + report: dict[str, dict[int, Any]] = {} + + for shard_id, header_data in self.parsed_headers.items(): + + for header in sorted(header_data.header_dictionary['committed_headers'], + key=lambda x: get_value('nonce', x)): + + epoch = get_value('epoch', header) + + # ensure epoch entry exists and contains all shards as keys + if epoch not in report: + report[epoch] = {sid: {} for sid in self.parsed_headers.keys()} + + if get_value('miniBlockHeaders', header) == []: + continue + + nonce = get_value('nonce', header) + round_num = get_value('round', header) + + # build result for this header (only cross-shard miniblocks) + result: dict[int, list] = {} + for miniblock in [mb for mb in get_value('miniBlockHeaders', header) if mb.get('senderShardID') == shard_id]: + mb_hash = miniblock.get('hash') + for mention_type, metadata in self.miniblocks[mb_hash]['mentioned']: + # skip proposed mentions + if 'proposed' in mention_type: + continue + + rn = metadata['round'] + color = miniblocks.get_color_for_state(mention_type, miniblock['txCount'], metadata) + label = f'Shard {metadata["shard_id"]}' if metadata["shard_id"] != 4294967295 else "MetaShard" + if miniblock['type'] != 0: + label += f' ({TYPE_NAMES[miniblock["type"]]})' + # append tuple (label, info, color) + result.setdefault(rn, []).append((label, mb_hash[:15] + '...', COLORS_MAPPING[color])) + + # if result empty -> we don't include this nonce at all, don't count it + if not result: + continue + + # sort rounds in result + result = dict(sorted(result.items())) + + # previous nonce round check - skip if miniblock from a previous nonce + if list(result.keys())[0] < round_num: + continue + + # store the nonce's data + report[epoch][shard_id][nonce] = result + + return report + + +class Header: + def __init__(self, header: dict[str, Any], status: str): + self.metadata: dict[str, Any] = self.get_header_metadata(header) + self.miniblocks: list[tuple[str, dict[str, Any], dict[str, Any]]] = self.get_miniblocks(header, status) + + # returns 'origin' or 'dest' based on miniblock senderShardID + def get_miniblock_shard_type(self, miniblock_shard_id: int) -> str: + return 'origin_shard' if self.metadata["shard_id"] == miniblock_shard_id else "dest_shard" + + def get_header_metadata(self, header: dict[str, Any]) -> dict[str, Any]: + if Header.isHeaderV2(header): + header = header['header'] + return { + "nonce": header.get('nonce', 0), + "round": header.get('round', 0), + "epoch": header.get('epoch', 0), + "shard_id": header.get('shardID', 4294967295), + } + + def get_miniblocks(self, header: dict[str, Any], status: str) -> list[tuple[str, dict[str, Any], dict[str, Any]]]: + miniblocks = [] + if Header.isHeaderV2(header): + header = header['header'] + for miniblock in header.get('miniBlockHeaders', []): + miniblock_mention = self.get_miniblock_shard_type(miniblock["senderShardID"]) + f'_{status}' + miniblocks.append((miniblock_mention, miniblock, self.metadata.copy())) + if Header.isMetaHeader(header): + for shard_header in header['shardInfo']: + shard_metadata = self.get_header_metadata(shard_header) + for miniblock in shard_header.get('shardMiniBlockHeaders', []): + miniblock_mention = f'{meta}_{origin_shard if shard_metadata['shard_id'] == miniblock['senderShardID'] else dest_shard}_{status}' + miniblocks.append((miniblock_mention, miniblock, self.metadata.copy())) + if Header.isMetaHeaderV3(header): + for exec_result in shard_header.get('executionResults', []): + for miniblock in exec_result.get('miniBlockHeaders', []): + miniblock_mention = f'{meta}_{origin_shard if shard_metadata["shard_id"] == miniblock["senderShardID"] else dest_shard}_exec_{status}' + miniblocks.append((miniblock_mention, miniblock, self.metadata.copy())) + if Header.isHeaderV3(header) or Header.isMetaHeaderV3(header): + for exec_result in header['executionResults']: + base_exec_result = exec_result.get('baseExecutionResult', {}) + exec_result_metadata = self.metadata.copy() + exec_result_metadata['nonce'] = base_exec_result.get('headerNonce', 0) + + for miniblock in exec_result.get('miniBlockHeaders', []): + miniblock_mention = self.get_miniblock_shard_type(miniblock["senderShardID"]) + f'_{status}_exec' + exec_result_metadata['exec_result_hash'] = miniblock.get('hash', '') + miniblocks.append((miniblock_mention, miniblock, exec_result_metadata.copy())) + + return miniblocks + + @staticmethod + def isHeaderV2(header: dict[str, Any]) -> bool: + return 'header' in header + + @staticmethod + def isMetaHeader(header: dict[str, Any]) -> bool: + return 'shardInfo' in header + + @staticmethod + def isHeaderV3(header: dict[str, Any]) -> bool: + return 'executionResults' in header and 'shardInfoProposal' not in header + + @staticmethod + def isMetaHeaderV3(header: dict[str, Any]) -> bool: + return 'shardInfoProposal' in header diff --git a/multiversx_cross_shard_analysis/headers_alarms_report.py b/multiversx_cross_shard_analysis/headers_alarms_report.py new file mode 100644 index 0000000..06b34e9 --- /dev/null +++ b/multiversx_cross_shard_analysis/headers_alarms_report.py @@ -0,0 +1,338 @@ +import argparse +import json +import os +import sys +from typing import Any + +from reportlab.graphics.shapes import Drawing, Rect, String +from reportlab.lib import colors +from reportlab.lib.pagesizes import A4 +from reportlab.lib.styles import getSampleStyleSheet +from reportlab.platypus import (Flowable, LongTable, PageBreak, Paragraph, + SimpleDocTemplate, Spacer, TableStyle) + +from multiversx_cross_shard_analysis.constants import COLORS_MAPPING, Colors +from multiversx_cross_shard_analysis.header_structures import (HeaderData, + ShardData) +from multiversx_cross_shard_analysis.miniblock_data import MiniblockData + +# ----------------------------- +# CONFIG (mirrors miniblock report) +# ----------------------------- + +PAGE_WIDTH, PAGE_HEIGHT = A4 +LEFT_MARGIN = RIGHT_MARGIN = 20 +TOP_MARGIN = BOTTOM_MARGIN = 20 + +RECT_H = 20 +RECT_PADDING_X = 4 +ROUND_HEADER_FONT = 7 +RECT_LABEL_FONT = 8 +RECT_INFO_FONT = 8 + +SECTION_BASE_HEIGHT = 110 # same idea as miniblock +EXTRA_LINE_HEIGHT = 18 # additional rows per stack +TITLE_HEIGHT = 60 + + +# ----------------------------- +# build stacked rectangles (same as miniblock version) +# ----------------------------- + +def build_stack_rows(items: list[tuple[str, str, colors.Color]], col_width: float) -> list[Drawing]: + """ + Instead of one giant Drawing, we return a list of small ones. + Each drawing represents one row in the vertical stack. + """ + row_drawings = [] + + if len(items) == 0: + # Create a single "no data" row + d = Drawing(col_width, RECT_H) + rect_w = max(2, col_width - RECT_PADDING_X * 2) - 4 + d.add(Rect(0, 2, rect_w, 12, fillColor=colors.whitesmoke, strokeColor=colors.grey)) # type: ignore + d.add(String(RECT_PADDING_X + 2, 6, "no data", fontSize=RECT_LABEL_FONT)) + row_drawings.append(d) + return row_drawings + + for label, info, col in items: + # Create a small drawing for just this one item + d = Drawing(col_width, RECT_H) + rect_w = max(2, col_width - RECT_PADDING_X * 2) - 4 + + d.add(Rect(0, 2, rect_w, RECT_H - 4, fillColor=col, strokeColor=colors.black)) # type: ignore + + text_x = RECT_PADDING_X + 3 + d.add(String(text_x, 12, label, fontSize=RECT_LABEL_FONT)) + d.add(String(text_x, 4, info, fontSize=RECT_INFO_FONT)) + row_drawings.append(d) + + return row_drawings + +# ----------------------------- +# check for round gaps +# ----------------------------- + + +def has_round_gap(rounds: list[int]) -> bool: + if len(rounds) < 2: + return False + rounds_sorted = sorted(rounds) + for a, b in zip(rounds_sorted, rounds_sorted[1:]): + if b != a + 1: + return True + return False + + +# ----------------------------- +# build section for one nonce +# ----------------------------- + +def build_nonce_section(shard_id: int, nonce: int, rounds: list[int], data: dict[int, list[Any]], + usable_width: float, highlight: bool = False) -> list[Flowable]: + + flow = [] + styles = getSampleStyleSheet() + + flow.append(Paragraph(f"Shard {shard_id} — Nonce {nonce}", styles["Heading3"])) + flow.append(Spacer(1, 4)) + + num_cols = len(rounds) + col_width = usable_width / max(1, num_cols) + + # 1. Build the Header Row + header = [Paragraph(f"{r}", styles["BodyText"]) for r in rounds] + + # 2. Transpose the stacks into rows + # We need to find the max height among all columns to normalize the row count + column_stacks = [build_stack_rows(data.get(r, []), col_width) for r in rounds] + max_rows = max(len(stack) for stack in column_stacks) + + table_data = [header] + + # Fill the table row by row + for i in range(max_rows): + row = [] + for stack in column_stacks: + if i < len(stack): + row.append(stack[i]) + else: + row.append("") # Empty cell if this column has fewer items + table_data.append(row) + + tbl = LongTable( + table_data, + colWidths=[col_width] * num_cols, + hAlign="LEFT", + splitByRow=True, # This allows the table to break across pages between rows + ) + + tbl_style = [ + ("GRID", (0, 0), (-1, -1), 0.25, colors.grey), + ("BACKGROUND", (0, 0), (-1, 0), colors.whitesmoke), + ("ALIGN", (0, 0), (-1, 0), "CENTER"), + ("VALIGN", (0, 1), (-1, -1), "TOP"), + ("TOPPADDING", (0, 0), (-1, -1), 0), # Tighten padding for large lists + ("BOTTOMPADDING", (0, 0), (-1, -1), 0), + ("FONTSIZE", (0, 0), (-1, 0), ROUND_HEADER_FONT), + ] + + if highlight: + tbl_style.append(("BOX", (0, 0), (-1, -1), 2, colors.red)) + + tbl.setStyle(TableStyle(tbl_style)) + + flow.append(tbl) + flow.append(Spacer(1, 8)) + return flow + + +# ----------------------------- +# PDF builder +# ----------------------------- + +def build_nonce_alarms_timeline_pdf(alarm_data: dict[str, dict[int, dict[int, dict[int, list[Any]]]]], + outname="nonce_alarms.pdf"): + doc = SimpleDocTemplate( + outname, + pagesize=A4, + leftMargin=LEFT_MARGIN, + rightMargin=RIGHT_MARGIN, + topMargin=TOP_MARGIN, + bottomMargin=BOTTOM_MARGIN, + ) + + usable_width = PAGE_WIDTH - LEFT_MARGIN - RIGHT_MARGIN + MAX_H = PAGE_HEIGHT - TOP_MARGIN - BOTTOM_MARGIN + + styles = getSampleStyleSheet() + story = [] + story.append(Paragraph("Nonce Alarms Report", styles["Title"])) + story.append(Spacer(1, 10)) + + current_h = 0 + first_page = True + for alarm, shards_data in alarm_data.items(): + if not shards_data: + continue + story.append(Paragraph(f"Alarm: {alarm}", styles["Heading2"])) + story.append(Spacer(1, 6)) + current_h += 36 # approx height of heading + spacer + + for shard_id, shard_dict in shards_data.items(): + for nonce, rdata in sorted(shard_dict.items()): + # height estimate based on max stack height + max_stack = max((len(v) for v in rdata.values()), default=1) + h_needed = SECTION_BASE_HEIGHT + max(0, max_stack - 2) * EXTRA_LINE_HEIGHT + + effective_page_height = MAX_H - (TITLE_HEIGHT if first_page else 0) + + if current_h + h_needed > effective_page_height: + story.append(PageBreak()) + current_h = 0 + first_page = False + + round_list = list(rdata.keys()) + story.extend(build_nonce_section(shard_id, nonce, round_list, rdata, usable_width)) + current_h += h_needed + + doc.build(story) + + +# ----------------------------- Example input data ------------------------------ +input_data = { + 0: { + 1: { + 100: [('origin_final', 'Shard 0', COLORS_MAPPING[Colors.origin_final])], + 101: [('origin_notarized', 'Shard 0', COLORS_MAPPING[Colors.meta_origin_committed])], + 102: [('dest_proposed', 'Shard 1', COLORS_MAPPING[Colors.dest_proposed]), ('dest_final', 'Shard 2', COLORS_MAPPING[Colors.dest_final])], + 103: [('dest_partial', 'Shard 1', COLORS_MAPPING[Colors.dest_partial_executed]), ('dest_notarized', 'Shard 2', COLORS_MAPPING[Colors.meta_dest_committed])], + 104: [('dest_final', 'Shard 1', COLORS_MAPPING[Colors.dest_final])], + 105: [('dest_notarized', 'Shard 1', COLORS_MAPPING[Colors.meta_dest_committed])], + }, + 2: { + 101: [('origin_proposed', 'Shard 0', COLORS_MAPPING[Colors.origin_proposed])], + 103: [('origin_final', 'Shard 0', COLORS_MAPPING[Colors.origin_final])], + 104: [('dest_final', 'Shard 2', COLORS_MAPPING[Colors.dest_final]), ('origin_notarized', 'Shard 0', COLORS_MAPPING[Colors.meta_origin_committed])], + 105: [('dest_notarized', 'Shard 2', COLORS_MAPPING[Colors.meta_dest_committed])], + } + }, + 1: { + 1: { + 101: [('N1', 'S1', COLORS_MAPPING[Colors.origin_final])], + 102: [('N1', 'S1', COLORS_MAPPING[Colors.meta_origin_committed])], + 103: [('N1', 'S0', COLORS_MAPPING[Colors.dest_proposed]), ('N1', 'S2', COLORS_MAPPING[Colors.dest_final])], + 104: [('N1', 'S0', COLORS_MAPPING[Colors.dest_partial_executed]), ('N1', 'S2', COLORS_MAPPING[Colors.meta_dest_committed])], + 105: [('N1', 'S0', COLORS_MAPPING[Colors.dest_final])], + 106: [('N1', 'S0', COLORS_MAPPING[Colors.meta_dest_committed])], + }, + 2: { + 102: [('N2', 'S1', COLORS_MAPPING[Colors.origin_partial_executed])], + 104: [('N2', 'S1', COLORS_MAPPING[Colors.origin_final])], + 105: [('N2', 'S2', COLORS_MAPPING[Colors.dest_final]), ('N2', 'S1', COLORS_MAPPING[Colors.meta_origin_committed])], + 106: [('N2', 'S2', COLORS_MAPPING[Colors.meta_dest_committed])], + }, + }, + 2: { + 1: { + 100: [('N1', 'S2', COLORS_MAPPING[Colors.origin_final])], + 101: [('N1', 'S2', COLORS_MAPPING[Colors.meta_origin_committed])], + 102: [('N1', 'S0', COLORS_MAPPING[Colors.dest_final]), ('N1', 'S1', COLORS_MAPPING[Colors.dest_final])], + 103: [('N1', 'S0', COLORS_MAPPING[Colors.meta_dest_committed]), ('N1', 'S1', COLORS_MAPPING[Colors.meta_dest_committed])], + }, + 2: { + 101: [('N2', 'S2', COLORS_MAPPING[Colors.origin_final])], + 102: [('N2', 'S2', COLORS_MAPPING[Colors.meta_origin_committed])], + 103: [('N2', 'S0', COLORS_MAPPING[Colors.dest_final])], + 104: [('N2', 'S0', COLORS_MAPPING[Colors.meta_dest_committed]), ('N2', 'S1', COLORS_MAPPING[Colors.dest_final])], + 105: [('N2', 'S1', COLORS_MAPPING[Colors.meta_dest_committed])], + }, + 3: { + 103: [('N3', 'S2', COLORS_MAPPING[Colors.origin_final])], + 104: [('N3', 'S2', COLORS_MAPPING[Colors.meta_origin_committed])], + 105: [('N3', 'S1', COLORS_MAPPING[Colors.dest_final])], + 106: [('N3', 'S1', COLORS_MAPPING[Colors.dest_final])], + 107: [('N3', 'S1', COLORS_MAPPING[Colors.meta_dest_committed])], + }, + }, + 4294967295: { + 1: { + 100: [('N1', 'M', COLORS_MAPPING[Colors.origin_final])], + 103: [('N1', 'M', COLORS_MAPPING[Colors.meta_origin_committed])], + 104: [('N1', 'S0', COLORS_MAPPING[Colors.dest_final]), ('N1', 'S1', COLORS_MAPPING[Colors.dest_final])], + 105: [('N1', 'S0', COLORS_MAPPING[Colors.meta_dest_committed]), ('N1', 'S1', COLORS_MAPPING[Colors.meta_dest_committed]), ('N1', 'S2', COLORS_MAPPING[Colors.dest_final])], + 106: [('N1', 'S2', COLORS_MAPPING[Colors.meta_dest_committed])], + } + } + +} + + +def main(): + + parser = argparse.ArgumentParser(description="Nonce timeline alarms report generator") + + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--path", type=str, help="Path to folder containing run output") + group.add_argument("--run-name", type=str, help="Name of the run inside ./Reports/") + + args = parser.parse_args() + + # resolve final folder path + if args.path: + base_path = args.path + else: + base_path = os.path.join("Reports", args.run_name) + + # verify base folder exists + if not os.path.isdir(base_path): + print(f"Error: folder not found: {base_path}") + sys.exit(1) + + # verify expected files exist + shard_ids = [0, 1, 2, 4294967295] + missing = [] + + for shard in shard_ids: + p = os.path.join(base_path, "Shards", f"{shard}_report.json") + if not os.path.isfile(p): + missing.append(p) + + miniblocks_path = os.path.join(base_path, "Miniblocks", "miniblocks_report.json") + if not os.path.isfile(miniblocks_path): + missing.append(miniblocks_path) + + if missing: + print("Error: missing required files:") + for m in missing: + print(" -", m) + sys.exit(1) + + # load JSONs + headers = ShardData() + + for shard in shard_ids: + with open(os.path.join(base_path, "Shards", f"{shard}_report.json")) as f: + data = json.load(f) + headers.parsed_headers[shard] = HeaderData() + headers.parsed_headers[shard].header_dictionary = data["shards"] + + with open(miniblocks_path) as f: + data = json.load(f) + headers.miniblocks = data["miniblocks"] + + # process + input_data = MiniblockData(headers.miniblocks).get_data_for_header_alarms_report() + + # output path + out_folder = os.path.join(base_path, "NonceAlarms") + os.makedirs(out_folder, exist_ok=True) + + for epoch in sorted(input_data.keys()): + outfile = os.path.join(out_folder, f"nonce_alarms_report_{epoch}.pdf") + build_nonce_alarms_timeline_pdf(input_data[epoch], outname=outfile) + print(f"Nonce alarms report for Epoch {epoch} generated: {outfile}") + + +if __name__ == "__main__": + main() diff --git a/multiversx_cross_shard_analysis/headers_timeline_report.py b/multiversx_cross_shard_analysis/headers_timeline_report.py new file mode 100644 index 0000000..b35dddc --- /dev/null +++ b/multiversx_cross_shard_analysis/headers_timeline_report.py @@ -0,0 +1,334 @@ +import argparse +import json +import os +import sys +from typing import Any + +from reportlab.graphics.shapes import Drawing, Rect, String +from reportlab.lib import colors +from reportlab.lib.pagesizes import A4 +from reportlab.lib.styles import getSampleStyleSheet +from reportlab.platypus import (Flowable, LongTable, PageBreak, Paragraph, + SimpleDocTemplate, Spacer, TableStyle) + +from multiversx_cross_shard_analysis.constants import COLORS_MAPPING, Colors +from multiversx_cross_shard_analysis.header_structures import (HeaderData, + ShardData) +from multiversx_cross_shard_analysis.miniblock_data import MiniblockData + +# ----------------------------- +# CONFIG (mirrors miniblock report) +# ----------------------------- + +PAGE_WIDTH, PAGE_HEIGHT = A4 +LEFT_MARGIN = RIGHT_MARGIN = 20 +TOP_MARGIN = BOTTOM_MARGIN = 20 + +RECT_H = 20 +RECT_PADDING_X = 4 +ROUND_HEADER_FONT = 7 +RECT_LABEL_FONT = 8 +RECT_INFO_FONT = 8 + +SECTION_BASE_HEIGHT = 110 # same idea as miniblock +EXTRA_LINE_HEIGHT = 18 # additional rows per stack +TITLE_HEIGHT = 60 + + +# ----------------------------- +# build stacked rectangles (same as miniblock version) +# ----------------------------- + +def build_stack_rows(items: list[tuple[str, str, colors.Color]], col_width: float) -> list[Drawing]: + """ + Instead of one giant Drawing, we return a list of small ones. + Each drawing represents one row in the vertical stack. + """ + row_drawings = [] + + if len(items) == 0: + # Create a single "no data" row + d = Drawing(col_width, RECT_H) + rect_w = max(2, col_width - RECT_PADDING_X * 2) - 4 + d.add(Rect(0, 2, rect_w, 12, fillColor=colors.whitesmoke, strokeColor=colors.grey)) # type: ignore + d.add(String(RECT_PADDING_X + 2, 6, "no data", fontSize=RECT_LABEL_FONT)) + row_drawings.append(d) + return row_drawings + + for label, info, col in items: + # Create a small drawing for just this one item + d = Drawing(col_width, RECT_H) + rect_w = max(2, col_width - RECT_PADDING_X * 2) - 4 + + d.add(Rect(0, 2, rect_w, RECT_H - 4, fillColor=col, strokeColor=colors.black)) # type: ignore + + text_x = RECT_PADDING_X + 3 + d.add(String(text_x, 12, label, fontSize=RECT_LABEL_FONT)) + d.add(String(text_x, 4, info, fontSize=RECT_INFO_FONT)) + row_drawings.append(d) + + return row_drawings + +# ----------------------------- +# check for round gaps +# ----------------------------- + + +def has_round_gap(rounds: list[int]) -> bool: + if len(rounds) < 2: + return False + rounds_sorted = sorted(rounds) + for a, b in zip(rounds_sorted, rounds_sorted[1:]): + if b != a + 1: + return True + return False + + +# ----------------------------- +# build section for one nonce +# ----------------------------- + +def build_nonce_section(shard_id: int, nonce: int, rounds: list[int], data: dict[int, list[Any]], + usable_width: float, highlight: bool = False) -> list[Flowable]: + + flow = [] + styles = getSampleStyleSheet() + + flow.append(Paragraph(f"Shard {shard_id} — Nonce {nonce}", styles["Heading3"])) + flow.append(Spacer(1, 4)) + + num_cols = len(rounds) + col_width = usable_width / max(1, num_cols) + + # 1. Build the Header Row + header = [Paragraph(f"{r}", styles["BodyText"]) for r in rounds] + + # 2. Transpose the stacks into rows + # We need to find the max height among all columns to normalize the row count + column_stacks = [build_stack_rows(data.get(r, []), col_width) for r in rounds] + max_rows = max(len(stack) for stack in column_stacks) + + table_data = [header] + + # Fill the table row by row + for i in range(max_rows): + row = [] + for stack in column_stacks: + if i < len(stack): + row.append(stack[i]) + else: + row.append("") # Empty cell if this column has fewer items + table_data.append(row) + + tbl = LongTable( + table_data, + colWidths=[col_width] * num_cols, + hAlign="LEFT", + splitByRow=True, # This allows the table to break across pages between rows + ) + + tbl_style = [ + ("GRID", (0, 0), (-1, -1), 0.25, colors.grey), + ("BACKGROUND", (0, 0), (-1, 0), colors.whitesmoke), + ("ALIGN", (0, 0), (-1, 0), "CENTER"), + ("VALIGN", (0, 1), (-1, -1), "TOP"), + ("TOPPADDING", (0, 0), (-1, -1), 0), # Tighten padding for large lists + ("BOTTOMPADDING", (0, 0), (-1, -1), 0), + ("FONTSIZE", (0, 0), (-1, 0), ROUND_HEADER_FONT), + ] + + if highlight: + tbl_style.append(("BOX", (0, 0), (-1, -1), 2, colors.red)) + + tbl.setStyle(TableStyle(tbl_style)) + + flow.append(tbl) + flow.append(Spacer(1, 8)) + return flow + + +# ----------------------------- +# PDF builder +# ----------------------------- + +def build_nonce_timeline_pdf(shards_data: dict[int, dict[int, dict[int, list[Any]]]], + nonce_alarms: dict[int, set[int]], + outname="nonce_timeline.pdf"): + doc = SimpleDocTemplate( + outname, + pagesize=A4, + leftMargin=LEFT_MARGIN, + rightMargin=RIGHT_MARGIN, + topMargin=TOP_MARGIN, + bottomMargin=BOTTOM_MARGIN, + ) + + usable_width = PAGE_WIDTH - LEFT_MARGIN - RIGHT_MARGIN + MAX_H = PAGE_HEIGHT - TOP_MARGIN - BOTTOM_MARGIN + + styles = getSampleStyleSheet() + story = [] + story.append(Paragraph("Nonce Timeline Report", styles["Title"])) + story.append(Spacer(1, 10)) + + current_h = 0 + first_page = True + + for shard_id, shard_dict in shards_data.items(): + for nonce, rdata in sorted(shard_dict.items()): + # height estimate based on max stack height + max_stack = max((len(v) for v in rdata.values()), default=1) + h_needed = SECTION_BASE_HEIGHT + max(0, max_stack - 2) * EXTRA_LINE_HEIGHT + + effective_page_height = MAX_H - (TITLE_HEIGHT if first_page else 0) + + if current_h + h_needed > effective_page_height: + story.append(PageBreak()) + current_h = 0 + first_page = False + + round_list = list(rdata.keys()) + alarm = nonce in nonce_alarms.get(shard_id, set()) + story.extend(build_nonce_section(shard_id, nonce, round_list, rdata, usable_width, alarm)) + current_h += h_needed + + doc.build(story) + + +# ----------------------------- Example input data ------------------------------ +input_data = { + 0: { + 1: { + 100: [('origin_final', 'Shard 0', COLORS_MAPPING[Colors.origin_final])], + 101: [('origin_notarized', 'Shard 0', COLORS_MAPPING[Colors.meta_origin_committed])], + 102: [('dest_proposed', 'Shard 1', COLORS_MAPPING[Colors.dest_proposed]), ('dest_final', 'Shard 2', COLORS_MAPPING[Colors.dest_final])], + 103: [('dest_partial', 'Shard 1', COLORS_MAPPING[Colors.dest_partial_executed]), ('dest_notarized', 'Shard 2', COLORS_MAPPING[Colors.meta_dest_committed])], + 104: [('dest_final', 'Shard 1', COLORS_MAPPING[Colors.dest_final])], + 105: [('dest_notarized', 'Shard 1', COLORS_MAPPING[Colors.meta_dest_committed])], + }, + 2: { + 101: [('origin_proposed', 'Shard 0', COLORS_MAPPING[Colors.origin_proposed])], + 103: [('origin_final', 'Shard 0', COLORS_MAPPING[Colors.origin_final])], + 104: [('dest_final', 'Shard 2', COLORS_MAPPING[Colors.dest_final]), ('origin_notarized', 'Shard 0', COLORS_MAPPING[Colors.meta_origin_committed])], + 105: [('dest_notarized', 'Shard 2', COLORS_MAPPING[Colors.meta_dest_committed])], + } + }, + 1: { + 1: { + 101: [('N1', 'S1', COLORS_MAPPING[Colors.origin_final])], + 102: [('N1', 'S1', COLORS_MAPPING[Colors.meta_origin_committed])], + 103: [('N1', 'S0', COLORS_MAPPING[Colors.dest_proposed]), ('N1', 'S2', COLORS_MAPPING[Colors.dest_final])], + 104: [('N1', 'S0', COLORS_MAPPING[Colors.dest_partial_executed]), ('N1', 'S2', COLORS_MAPPING[Colors.meta_dest_committed])], + 105: [('N1', 'S0', COLORS_MAPPING[Colors.dest_final])], + 106: [('N1', 'S0', COLORS_MAPPING[Colors.meta_dest_committed])], + }, + 2: { + 102: [('N2', 'S1', COLORS_MAPPING[Colors.origin_partial_executed])], + 104: [('N2', 'S1', COLORS_MAPPING[Colors.origin_final])], + 105: [('N2', 'S2', COLORS_MAPPING[Colors.dest_final]), ('N2', 'S1', COLORS_MAPPING[Colors.meta_origin_committed])], + 106: [('N2', 'S2', COLORS_MAPPING[Colors.meta_dest_committed])], + }, + }, + 2: { + 1: { + 100: [('N1', 'S2', COLORS_MAPPING[Colors.origin_final])], + 101: [('N1', 'S2', COLORS_MAPPING[Colors.meta_origin_committed])], + 102: [('N1', 'S0', COLORS_MAPPING[Colors.dest_final]), ('N1', 'S1', COLORS_MAPPING[Colors.dest_final])], + 103: [('N1', 'S0', COLORS_MAPPING[Colors.meta_dest_committed]), ('N1', 'S1', COLORS_MAPPING[Colors.meta_dest_committed])], + }, + 2: { + 101: [('N2', 'S2', COLORS_MAPPING[Colors.origin_final])], + 102: [('N2', 'S2', COLORS_MAPPING[Colors.meta_origin_committed])], + 103: [('N2', 'S0', COLORS_MAPPING[Colors.dest_final])], + 104: [('N2', 'S0', COLORS_MAPPING[Colors.meta_dest_committed]), ('N2', 'S1', COLORS_MAPPING[Colors.dest_final])], + 105: [('N2', 'S1', COLORS_MAPPING[Colors.meta_dest_committed])], + }, + 3: { + 103: [('N3', 'S2', COLORS_MAPPING[Colors.origin_final])], + 104: [('N3', 'S2', COLORS_MAPPING[Colors.meta_origin_committed])], + 105: [('N3', 'S1', COLORS_MAPPING[Colors.dest_final])], + 106: [('N3', 'S1', COLORS_MAPPING[Colors.dest_final])], + 107: [('N3', 'S1', COLORS_MAPPING[Colors.meta_dest_committed])], + }, + }, + 4294967295: { + 1: { + 100: [('N1', 'M', COLORS_MAPPING[Colors.origin_final])], + 103: [('N1', 'M', COLORS_MAPPING[Colors.meta_origin_committed])], + 104: [('N1', 'S0', COLORS_MAPPING[Colors.dest_final]), ('N1', 'S1', COLORS_MAPPING[Colors.dest_final])], + 105: [('N1', 'S0', COLORS_MAPPING[Colors.meta_dest_committed]), ('N1', 'S1', COLORS_MAPPING[Colors.meta_dest_committed]), ('N1', 'S2', COLORS_MAPPING[Colors.dest_final])], + 106: [('N1', 'S2', COLORS_MAPPING[Colors.meta_dest_committed])], + } + } + +} + + +def main(): + + parser = argparse.ArgumentParser(description="Nonce timeline report generator") + + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--path", type=str, help="Path to folder containing run output") + group.add_argument("--run-name", type=str, help="Name of the run inside ./Reports/") + + args = parser.parse_args() + + # resolve final folder path + if args.path: + base_path = args.path + else: + base_path = os.path.join("Reports", args.run_name) + + # verify base folder exists + if not os.path.isdir(base_path): + print(f"Error: folder not found: {base_path}") + sys.exit(1) + + # verify expected files exist + shard_ids = [0, 1, 2, 4294967295] + missing = [] + + for shard in shard_ids: + p = os.path.join(base_path, "Shards", f"{shard}_report.json") + if not os.path.isfile(p): + missing.append(p) + + miniblocks_path = os.path.join(base_path, "Miniblocks", "miniblocks_report.json") + if not os.path.isfile(miniblocks_path): + missing.append(miniblocks_path) + + if missing: + print("Error: missing required files:") + for m in missing: + print(" -", m) + sys.exit(1) + + # load JSONs + headers = ShardData() + + for shard in shard_ids: + with open(os.path.join(base_path, "Shards", f"{shard}_report.json")) as f: + data = json.load(f) + headers.parsed_headers[shard] = HeaderData() + headers.parsed_headers[shard].header_dictionary = data["shards"] + + with open(miniblocks_path) as f: + data = json.load(f) + headers.miniblocks = data["miniblocks"] + + # process + input_data, nonce_alarms = MiniblockData(headers.miniblocks).get_data_for_header_report() + + # output path + out_folder = os.path.join(base_path, "NonceTimeline") + os.makedirs(out_folder, exist_ok=True) + + for epoch in sorted(input_data.keys()): + outfile = os.path.join(out_folder, f"nonce_timeline_report_{epoch}.pdf") + build_nonce_timeline_pdf(input_data[epoch], nonce_alarms, outname=outfile) + print(f"Nonce timeline report for Epoch {epoch} generated: {outfile}") + + +if __name__ == "__main__": + main() diff --git a/multiversx_cross_shard_analysis/issues.py b/multiversx_cross_shard_analysis/issues.py new file mode 100644 index 0000000..0d0c082 --- /dev/null +++ b/multiversx_cross_shard_analysis/issues.py @@ -0,0 +1,164 @@ +from enum import Enum +from typing import Any, Callable + +MAX_ROUND_GAP_ALLOWED = 1 +SUPERNOVA_ACTIVATION_EPOCH = 2 + + +class Issues(Enum): + MISSING_OR_DUPLICATE_DESTINATION = 'missing_or_duplicate_destination' + WRONG_PROCESSING_ORDER = 'wrong_processing_order' + GAP_BETWEEN_ROUNDS = 'gap_between_rounds' + + # Logic for: GAP_BETWEEN_ROUNDS + def check_gap_between_rounds(self, mb_info: dict[str, Any]) -> bool: + last_round = -1 + for _, mentioning_header in mb_info.get('mentioned', []): + if last_round == -1: + last_round = mentioning_header.get('round') + elif mentioning_header.get('round') - last_round > MAX_ROUND_GAP_ALLOWED: + return True + last_round = mentioning_header.get('round') + return False + + # Logic for: MISSING_DESTINATION + def check_missing_or_duplicate_destination(self, mb_info: dict[str, Any]) -> bool: + receiver = mb_info.get("receiverShardID") + sender = mb_info.get("senderShardID") + count = 0 + + for _, header in mb_info.get("mentioned", []): + if header.get("shard_id") == receiver and mb_info.get("type") in [0, 90]: + count += 1 + + is_dest_missing = count == 0 and mb_info.get("type") in [0, 90] + is_dest_duplicate = count > 4 and mb_info.get("type") in [0, 90] and receiver != sender and mb_info.get("first_seen_epoch", 0) >= SUPERNOVA_ACTIVATION_EPOCH + + return is_dest_missing or is_dest_duplicate + + # Logic for: WRONG_PROCESSING_ORDER + def check_wrong_order(self, mb_info: dict[str, Any]) -> bool: + max_phase = -1 + + for mtype, data in sorted(mb_info.get('mentioned', []), key=lambda x: x[1].get('round', 0)): + if 'exec' in mtype: + phase = 1 if 'origin' in mtype else 4 + elif 'meta' in mtype: + phase = 2 if 'origin' in mtype else 5 + else: + phase = 0 if 'origin' in mtype else 3 + + if phase < max_phase: + return True + max_phase = phase + + return False + + def run_check(self, issue_type: 'Issues', mb_info: dict[str, Any]) -> bool: + """Helper to route to the correct method.""" + check_map: dict[Issues, Callable] = { + Issues.MISSING_OR_DUPLICATE_DESTINATION: self.check_missing_or_duplicate_destination, + Issues.WRONG_PROCESSING_ORDER: self.check_wrong_order, + Issues.GAP_BETWEEN_ROUNDS: self.check_gap_between_rounds, + } + return check_map[issue_type](mb_info) + + +''' +Example miniblock structure after being processed and enriched: + { + "hash": "5db8a831cad452a5d85aa1b7aa033f864827d083f54fc5133f83e8e5d16a2dac", + "receiverShardID": 1, + "reserved": "209208", + "senderShardID": 0, + "txCount": 1043, + "type": 0, + "first_seen_round": 441, + "last_seen_round": 443, + "first_seen_epoch": 2, + "nonce": 440, + "mentioned": [ + [ + "origin_shard_proposed_headers_exec", + { + "nonce": 440, + "round": 441, + "epoch": 2, + "shard_id": 0, + "reserved": { + "ExecutionType": "Normal", + "State": "Final", + "IndexOfFirstTxProcessed": 0, + "IndexOfLastTxProcessed": 26 + } + } + ], + [ + "origin_shard_committed_headers_exec", + { + "nonce": 440, + "round": 441, + "epoch": 2, + "shard_id": 0, + "reserved": { + "ExecutionType": "Normal", + "State": "Final", + "IndexOfFirstTxProcessed": 0, + "IndexOfLastTxProcessed": 26 + } + } + ], + [ + "dest_shard_proposed_headers", + { + "nonce": 443, + "round": 443, + "epoch": 2, + "shard_id": 1, + "reserved": { + "ExecutionType": "Normal", + "State": "Proposed", + "IndexOfFirstTxProcessed": 0, + "IndexOfLastTxProcessed": 1042 + } + } + ], + [ + "dest_shard_committed_headers", + { + "nonce": 443, + "round": 443, + "epoch": 2, + "shard_id": 1, + "reserved": { + "ExecutionType": "Normal", + "State": "Proposed", + "IndexOfFirstTxProcessed": 0, + "IndexOfLastTxProcessed": 1042 + } + } + ], + [ + "meta_origin_shard_proposed_headers", + { + "nonce": 442, + "round": 442, + "epoch": 2, + "shard_id": 4294967295, + "reserved": {} + } + ], + [ + "meta_origin_shard_committed_headers", + { + "nonce": 442, + "round": 442, + "epoch": 2, + "shard_id": 4294967295, + "reserved": {} + } + ] + ] + } + +''' diff --git a/multiversx_cross_shard_analysis/miniblock_data.py b/multiversx_cross_shard_analysis/miniblock_data.py new file mode 100644 index 0000000..7113d6a --- /dev/null +++ b/multiversx_cross_shard_analysis/miniblock_data.py @@ -0,0 +1,356 @@ + +from typing import Any + +from multiversx_cross_shard_analysis.constants import (COLORS_MAPPING, + TYPE_NAMES, Colors) +from multiversx_cross_shard_analysis.decode_reserved import \ + get_default_decoded_data +from multiversx_cross_shard_analysis.issues import Issues + + +class MiniblockData: + + def __init__(self, miniblocks: dict[str, dict[str, Any]]): + self.miniblocks = miniblocks + self.verify_miniblocks() + + def verify_miniblocks(self) -> None: + for mb_hash, mb_info in self.miniblocks.items(): + mb_info['mentioned'] = sorted(mb_info.get('mentioned', []), key=lambda x: (x[1].get('epoch', 0), x[1].get('round', 0))) + mentioning_header = mb_info['mentioned'][0][1] if mb_info['mentioned'] else None + if mentioning_header: + mb_info['first_seen_round'] = mentioning_header.get('round') + mb_info['first_seen_epoch'] = mentioning_header.get('epoch') + mb_info['nonce'] = mentioning_header.get('nonce') + mb_info['senderShardID'] = mentioning_header.get('shard_id') + mb_info['alarms'] = [] + + # Perform the configurable checks + for issue in Issues: + if issue.run_check(issue, mb_info): + mb_info['alarms'].append(issue.name) + + # Set the general alarm flag if any issues were found + mb_info['hasAlarm'] = len(mb_info['alarms']) > 0 + + def get_color_for_state(self, mention_type: str, tx_count: int, header: dict[str, Any]) -> Colors: + reserved = header.get('reserved', {}) + if reserved == {}: + reserved = get_default_decoded_data(tx_count=tx_count) + if "meta" in mention_type: + if 'exec' in mention_type: + color = Colors.meta_origin_exec_committed if mention_type.startswith('meta_origin') else Colors.meta_dest_exec_committed + else: + color = Colors.meta_origin_committed if mention_type.startswith('meta_origin') else Colors.meta_dest_committed + else: + if 'exec' in mention_type: + color = Colors.origin_exec_final if mention_type.startswith('origin') else Colors.dest_exec_final + else: + color = Colors.origin_final if mention_type.startswith('origin') else Colors.dest_final + else: + # execution_type = header.get('reserved', {}).get('ExecutionType', '') + state = header.get('reserved', {}).get('State', '') + if 'exec' in mention_type: + if state == 'Proposed': + color = Colors.origin_exec_proposed if mention_type.startswith('origin') else Colors.dest_exec_proposed + elif state == 'PartialExecuted': + color = Colors.origin_exec_partial_executed if mention_type.startswith('origin') else Colors.dest_exec_partial_executed + else: + color = Colors.origin_exec_final if mention_type.startswith('origin') else Colors.dest_exec_final + else: + if state == 'Proposed': + color = Colors.origin_proposed if mention_type.startswith('origin') else Colors.dest_proposed + elif state == 'PartialExecuted': + color = Colors.origin_partial_executed if mention_type.startswith('origin') else Colors.dest_partial_executed + else: + color = Colors.origin_final if mention_type.startswith('origin') else Colors.dest_final + return color + + def get_data_for_round_report(self) -> dict[str, Any]: + report = {} + for mb_hash, mb_info in self.miniblocks.items(): + for mention_type, header in mb_info.get('mentioned', []): + if "proposed" in mention_type: + continue + + epoch = header.get('epoch') + if epoch not in report: + report[epoch] = {} + round_number = header.get('round') + if round_number not in report[epoch]: + report[epoch][round_number] = {} + shard = header.get('shard_id') + if shard not in report[epoch][round_number]: + report[epoch][round_number][shard] = [] + + color = COLORS_MAPPING[self.get_color_for_state(mention_type, mb_info['txCount'], header)] + report[epoch][round_number][shard].append((mb_hash, color)) + return report + + def get_data_for_detail_report(self) -> dict[str, list[dict[str, Any]]]: + report = {} + for mb_hash, mb_info in self.miniblocks.items(): + origin_epoch = None + + mb_data = { + "hash": mb_hash, + "first_seen_round": None, + "last_seen_round": None, + "receiverShardID": mb_info['receiverShardID'], + "senderShardID": mb_info['senderShardID'], + "txCount": mb_info['txCount'], + "type": mb_info['type'], + "hasAlarm": mb_info['hasAlarm'], + "mentioned": {}, + } + for mention_type, header in mb_info.get('mentioned', []): + epoch = header.get('epoch') + if epoch is not None and (origin_epoch is None or epoch < origin_epoch): + origin_epoch = epoch + round_number = header.get('round') + if mb_data['first_seen_round'] is None or round_number < mb_data['first_seen_round']: + mb_data['first_seen_round'] = round_number + if mb_data['last_seen_round'] is None or round_number > mb_data['last_seen_round']: + mb_data['last_seen_round'] = round_number + if round_number not in mb_data['mentioned']: + mb_data['mentioned'][round_number] = [] + + color = COLORS_MAPPING[self.get_color_for_state(mention_type, mb_info['txCount'], header)] + reserved = header.get('reserved') + if reserved == {}: + reserved = get_default_decoded_data(tx_count=mb_info['txCount']) + mb_data['mentioned'][round_number].append((mention_type, f"txs {reserved['IndexOfFirstTxProcessed']}–{reserved['IndexOfLastTxProcessed']} / {mb_info['txCount']}", color)) + + if not origin_epoch: + print(f"Warning: origin_epoch not found for miniblock {mb_hash}") + continue + if origin_epoch not in report: + report[origin_epoch] = [] + report[origin_epoch].append(mb_data) + + for epoch, mb_list in report.items(): + mb_list.sort(key=lambda x: x['first_seen_round']) + return report + + def get_data_for_header_report(self) -> tuple[dict[int, dict[int, Any]], dict[int, set[int]]]: + report: dict[int, dict[int, Any]] = {} + + nonce_alarms = dict[int, set[int]]() + for mb_hash, mb_info in self.miniblocks.items(): + nonce = mb_info['nonce'] + shard_id = mb_info['senderShardID'] + epoch = mb_info['first_seen_epoch'] + hasAlarm = mb_info['hasAlarm'] + if hasAlarm: + nonce_alarms.setdefault(shard_id, set()).add(nonce) + + for mention_type, header in mb_info.get('mentioned', []): + if "proposed" in mention_type: + continue + + if epoch not in report: + report[epoch] = {} + + if shard_id not in report[epoch]: + report[epoch][shard_id] = {} + + if nonce not in report[epoch][shard_id]: + report[epoch][shard_id][nonce] = {} + + round_number = header.get('round') + if round_number not in report[epoch][shard_id][nonce]: + report[epoch][shard_id][nonce][round_number] = [] + + color = COLORS_MAPPING[self.get_color_for_state(mention_type, mb_info['txCount'], header)] + label = f'Shard {header["shard_id"]}' if header["shard_id"] != 4294967295 else "MetaShard" + + if mb_info['type'] != 0: + label += f' ({TYPE_NAMES[mb_info["type"]]})' + else: + label += f' ({mb_info["senderShardID"]} -> {mb_info["receiverShardID"]})' + + report[epoch][shard_id][nonce][round_number].append((label, mb_hash[:15] + '...', color)) + + return sort_any(report), nonce_alarms + + def get_data_for_header_alarms_report(self) -> dict[int, Any]: + report: dict[int, Any] = {} + nonce_alarms: dict[int, dict[int, set]] = {} + + seen_miniblocks = set[str]() + + for mb_hash, mb_info in [(hash, miniblock) for hash, miniblock in self.miniblocks.items() if miniblock['hasAlarm']]: + nonce = mb_info['nonce'] + shard_id = mb_info['senderShardID'] + epoch = mb_info['first_seen_epoch'] + + hasAlarm = mb_info['hasAlarm'] + alarms = mb_info['alarms'] + if hasAlarm: + shard_map = nonce_alarms.setdefault(shard_id, {}) + shard_map.setdefault(nonce, set()).update(alarms) + + for mention_type, header in mb_info.get('mentioned', []): + if "proposed" in mention_type: + continue + + # prepare epoch level + if epoch not in report: + report[epoch] = {} + for issue in Issues: + report[epoch][issue.name] = {} + + color = COLORS_MAPPING[self.get_color_for_state(mention_type, mb_info['txCount'], header)] + label = f'Shard {header["shard_id"]}' if header["shard_id"] != 4294967295 else "MetaShard" + + if mb_info['type'] != 0: + label += f' ({TYPE_NAMES[mb_info["type"]]})' + else: + label += f' ({mb_info["senderShardID"]} -> {mb_info["receiverShardID"]})' + + for issue in mb_info['alarms']: + if shard_id not in report[epoch][issue]: + report[epoch][issue][shard_id] = {} + + if nonce not in report[epoch][issue][shard_id]: + report[epoch][issue][shard_id][nonce] = {} + + round_number = header.get('round') + if round_number not in report[epoch][issue][shard_id][nonce]: + report[epoch][issue][shard_id][nonce][round_number] = [] + + report[epoch][issue][shard_id][nonce][round_number].append((label, mb_hash[:15] + '...', color)) + seen_miniblocks.add(mb_hash) + + for mb_hash in [item for item in self.miniblocks.keys() if item not in seen_miniblocks and self.miniblocks[item]['nonce'] in nonce_alarms.get(self.miniblocks[item]['senderShardID'], set())]: + mb_info = self.miniblocks[mb_hash] + nonce = mb_info['nonce'] + shard_id = mb_info['senderShardID'] + epoch = mb_info['first_seen_epoch'] + + for mention_type, header in mb_info.get('mentioned', []): + if "proposed" in mention_type: + continue + + # prepare epoch level + if epoch not in report: + report[epoch] = {} + for issue in Issues: + report[epoch][issue.name] = {} + + color = COLORS_MAPPING[self.get_color_for_state(mention_type, mb_info['txCount'], header)] + label = f'Shard {header["shard_id"]}' if header["shard_id"] != 4294967295 else "MetaShard" + + if mb_info['type'] != 0: + label += f' ({TYPE_NAMES[mb_info["type"]]})' + else: + label += f' ({mb_info["senderShardID"]} -> {mb_info["receiverShardID"]})' + + for issue in nonce_alarms[shard_id][nonce]: + if shard_id not in report[epoch][issue]: + report[epoch][issue][shard_id] = {} + + if nonce not in report[epoch][issue][shard_id]: + report[epoch][issue][shard_id][nonce] = {} + + round_number = header.get('round') + if round_number not in report[epoch][issue][shard_id][nonce]: + report[epoch][issue][shard_id][nonce][round_number] = [] + + report[epoch][issue][shard_id][nonce][round_number].append((label, mb_hash[:15] + '...', color)) + + return sort_any(report) + + +def sort_report(report: dict[int, dict[int, Any]]) -> dict[int, dict[int, Any]]: + out: dict[int, dict[int, Any]] = {} + + for epoch in sorted(report.keys()): + out[epoch] = {} + + # metas hard (4294967295) last + shard_ids = sorted( + report[epoch].keys(), + key=lambda s: (s == 4294967295, s), + ) + + for shard_id in shard_ids: + out[epoch][shard_id] = {} + + for nonce in sorted(report[epoch][shard_id].keys()): + rounds = report[epoch][shard_id][nonce] + + out[epoch][shard_id][nonce] = { + r: rounds[r] + for r in sorted(rounds.keys()) + } + + return out + + +def sort_report1(report: dict[int, dict[int, Any]]) -> dict[int, dict[int, Any]]: + out: dict[int, dict[int, Any]] = {} + + for epoch in sorted(report.keys()): + out[epoch] = {} + + for issue in sorted(report[epoch].keys()): + out[epoch][issue] = {} + # metas hard (4294967295) last + shard_ids = sorted( + report[epoch][issue].keys(), + key=lambda s: (s == 4294967295, s), + ) + + for shard_id in shard_ids: + out[epoch][issue][shard_id] = {} + + for nonce in sorted(report[epoch][issue][shard_id].keys()): + rounds = report[epoch][issue][shard_id][nonce] + + out[epoch][issue][shard_id][nonce] = { + r: rounds[r] + for r in sorted(rounds.keys()) + } + + return out + + +META_SHARD_ID = 4294967295 + + +def sort_any(data: Any) -> Any: + """ + Recursively sorts dictionaries and lists. + - Dictionaries: Sorted by keys (Meta Shard always last). + - Lists: Elements are recursively sorted. + - Others: Returned as is. + """ + # Case 1: It's a Dictionary + if isinstance(data, dict): + # Determine the sorted order of keys + sorted_keys = sorted( + data.keys(), + key=lambda k: ( + # Rule: If key is the Meta Shard ID, put it last + (k == META_SHARD_ID) if isinstance(k, int) else False, + # Otherwise, sort naturally by value/string + k + ) + ) + # Rebuild dictionary recursively + return {k: sort_any(data[k]) for k in sorted_keys} + + # Case 2: It's a List/Array + elif isinstance(data, list): + # Sort each item inside the list first + processed_list = [sort_any(item) for item in data] + try: + # Try to sort the list itself if elements are comparable + return sorted(processed_list) + except TypeError: + # If elements are non-comparable (e.g., list of dicts), return as is + return processed_list + + # Case 3: Primitive types (int, str, bool, None) + return data diff --git a/multiversx_cross_shard_analysis/miniblocks_round_report.py b/multiversx_cross_shard_analysis/miniblocks_round_report.py new file mode 100644 index 0000000..b08dca7 --- /dev/null +++ b/multiversx_cross_shard_analysis/miniblocks_round_report.py @@ -0,0 +1,227 @@ +import argparse +import json +import os +import sys +from typing import Any + +from reportlab.graphics.shapes import Drawing, Rect, String +from reportlab.lib import colors +from reportlab.lib.pagesizes import A4 +from reportlab.lib.styles import getSampleStyleSheet +from reportlab.platypus import (Flowable, Paragraph, SimpleDocTemplate, Spacer, + Table, TableStyle) + +from multiversx_cross_shard_analysis.constants import COLORS_MAPPING +from multiversx_cross_shard_analysis.miniblock_data import MiniblockData + +# ---------------------------------------- +# legend +# ---------------------------------------- + + +def legend_box(color: colors.Color) -> Drawing: + d = Drawing(8, 8) + d.add(Rect(0, 0, 8, 8, fillColor=color, strokeColor=colors.black)) # type: ignore + return d + + +def build_legend(): + # turn dict into list of (label, color) + items = list(COLORS_MAPPING.items()) + + # 3 columns grid + cols = 3 + rows = [] + row = [] + + for i, (label, color) in enumerate(items): + row.append([legend_box(color), label]) + if len(row) == cols: + rows.append(row) + row = [] + + # leftover + if row: + rows.append(row) + + # flatten structure for Table + flat_rows = [] + for r in rows: + flat = [] + for (box, label) in r: + flat.append(box) + flat.append(label) + flat_rows.append(flat) + + # widths: box col, text col, box col, text col, etc. + col_widths = [] + for _ in range(cols): + col_widths.extend([10, 140]) + + tbl = Table(flat_rows, colWidths=col_widths) + tbl.setStyle(TableStyle([ + ("VALIGN", (0, 0), (-1, -1), "MIDDLE"), + ("LEFTPADDING", (0, 0), (-1, -1), 1), + ("RIGHTPADDING", (0, 0), (-1, -1), 2), + ("TOPPADDING", (0, 0), (-1, -1), 1), + ("BOTTOMPADDING", (0, 0), (-1, -1), 1), + ])) + + return tbl + + +# ---------------------------------------- +# miniblock graphics (stack of boxes) +# ---------------------------------------- + + +def miniblock_box(text: str, stage_color: colors.Color) -> Drawing: + d = Drawing(120, 18) + d.add(Rect(0, 0, 120, 18, fillColor=stage_color, strokeColor=colors.black)) # type: ignore + d.add(String(3, 5, text, fontSize=6, fillColor=colors.black)) + return d + + +def stacked_miniblocks(miniblocks: list[tuple[str, colors.Color]], shard: int | None = None) -> Drawing: + height = 20 * (len(miniblocks) + 1 if shard is not None else len(miniblocks)) + d = Drawing(120, height) + y = height - 20 + + # header rectangle (same size, no fill) + if shard is not None: + d.add(Rect(0, y, 120, 18, fillColor=None, strokeColor=colors.black)) # type: ignore + d.add(String(3, y + 5, f"Shard {shard}", fontSize=6, fontName="Helvetica-Bold")) + y -= 20 + + # miniblocks + for (h, color) in miniblocks: + d.add(Rect(0, y, 120, 18, fillColor=color, strokeColor=colors.black)) # type: ignore + d.add(String(3, y + 5, h[:36] + "...", fontSize=6)) + y -= 20 + + return d + +# ---------------------------------------- +# horizontal layout helper +# ---------------------------------------- + + +class HFlowable(Flowable): + def __init__(self, flowables: list[Flowable], space=6): + super().__init__() + self.flowables = flowables + self.space = space + + def wrap(self, aW: float, aH: float) -> tuple[float, float]: + w, h = 0, 0 + for fl in self.flowables: + fw, fh = fl.wrap(aW, aH) + w += fw + self.space + h = max(h, fh) + self.width, self.height = w, h + return w, h + + def draw(self): + x = 0 + for fl in self.flowables: + fl.wrapOn(self.canv, 0, 0) + fl.drawOn(self.canv, x, 0) + x += fl.width + self.space + + +# ---------------------------------------- +# build report for one epoch +# ---------------------------------------- + +def build_report(epoch: int, rounds_data: dict[int, Any], shards: list[int], outname: str): + + doc = SimpleDocTemplate( + outname, + pagesize=A4, + leftMargin=20, rightMargin=20, topMargin=20, bottomMargin=20 + ) + + story = [] + styles = getSampleStyleSheet() + + # title + story.append(Paragraph(f"Miniblock Shards Report — Epoch {epoch}", styles["Title"])) + story.append(Spacer(1, 8)) + story.append(build_legend()) + story.append(Spacer(1, 12)) + + for rnd, shard_map in rounds_data.items(): + # round header + story.append(Paragraph(f"Round {rnd}", styles["Heading3"])) + story.append(Spacer(1, 6)) + + # for each row: we need max miniblocks across shards + max_rows = max(len(shard_map.get(s, [])) for s in shards) + + for i in range(max_rows): + row_flowables = [] + + for shard in shards: + mbs = shard_map.get(shard, []) + if i < len(mbs): + row_flowables.append(stacked_miniblocks([mbs[i]], shard if i == 0 else None)) + else: + # empty placeholder to keep columns aligned + row_flowables.append(Spacer(120, 20)) + + # add horizontal row + story.append(HFlowable(row_flowables, space=12)) + + story.append(Spacer(1, 20)) + + doc.build(story) + + +# ---------------------------------------- +# main +# ---------------------------------------- +def main(): + parser = argparse.ArgumentParser(description="Miniblock shards timeline report") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--path", type=str, help="Path to run folder") + group.add_argument("--run-name", type=str, help="Name of the folder under ./Reports/") + + args = parser.parse_args() + + # resolve base path + if args.path: + base_path = args.path + else: + base_path = os.path.join("Reports", args.run_name) + + if not os.path.isdir(base_path): + print(f"Error: folder not found: {base_path}") + sys.exit(1) + + miniblocks_path = os.path.join(base_path, "Miniblocks", "miniblocks_report.json") + if not os.path.isfile(miniblocks_path): + print("Error: missing required file:") + print(" -", miniblocks_path) + sys.exit(1) + + # load JSON + with open(miniblocks_path, "r") as f: + data = json.load(f) + + mb_data = MiniblockData(data["miniblocks"]).get_data_for_round_report() + + # output folder + out_folder = os.path.join(base_path, "MiniblocksShardTimeline") + os.makedirs(out_folder, exist_ok=True) + + # generate PDFs per epoch + for epoch in sorted(mb_data.keys()): + print(f"Epoch: {epoch}") + report_dict = mb_data[epoch] + outfile = os.path.join(out_folder, f"shards_timeline_report_{epoch}.pdf") + build_report(int(epoch), report_dict, shards=[0, 1, 2, 4294967295], outname=outfile) + print("→", outfile) + + +if __name__ == "__main__": + main() diff --git a/multiversx_cross_shard_analysis/miniblocks_timeline_report.py b/multiversx_cross_shard_analysis/miniblocks_timeline_report.py new file mode 100644 index 0000000..96da12f --- /dev/null +++ b/multiversx_cross_shard_analysis/miniblocks_timeline_report.py @@ -0,0 +1,286 @@ +""" +miniblock_timeline_report.py + +Produces miniblock timeline report PDF: +- multiple miniblocks per page +- each miniblock: subtitle + full hash + meta info +- timeline table: columns = rounds (including gaps), each column contains stacked colored rectangles for mentions +- colors: use mention['color'] if present, otherwise derived from mention type + reserved +""" +import argparse +import json +import os +import sys +from typing import Any + +from reportlab.graphics.shapes import Drawing, Rect, String +from reportlab.lib import colors +from reportlab.lib.pagesizes import A4 +from reportlab.lib.styles import getSampleStyleSheet +from reportlab.platypus import (PageBreak, Paragraph, SimpleDocTemplate, + Spacer, Table, TableStyle) +from reportlab.platypus.flowables import Flowable + +from multiversx_cross_shard_analysis.constants import TYPE_NAMES +from multiversx_cross_shard_analysis.miniblock_data import MiniblockData + +# ----------------------------- +# CONFIG +# ----------------------------- + +PAGE_WIDTH, PAGE_HEIGHT = A4 +LEFT_MARGIN = RIGHT_MARGIN = 20 +TOP_MARGIN = BOTTOM_MARGIN = 20 + +MINIBLOCKS_PER_PAGE = 6 + +ROUND_HEADER_FONT = 7 +RECT_LABEL_FONT = 8 +RECT_INFO_FONT = 8 + +# rectangle drawing dimensions +RECT_H = 20 +RECT_PADDING_X = 4 + +# ----------------------------- +# small flowable for a left-aligned rectangle inside a table cell +# ----------------------------- + + +class RectCell(Flowable): + '''not used currently, included for further development''' + + def __init__(self, label: str, info: str, color: colors.Color, width: float, height: float = 14, padding: float = 2): + super().__init__() + self.label = label + self.info = info + self.color = color + self.width = width + self.height = height + self.padding = padding + + def wrap(self, aW: float, aH: float): + # force rect to match column width, never bigger + return self.width, self.height * 2 + + def draw(self): + c = self.canv + + c.setFillColor(self.color) + c.rect(0, 0, self.width, self.height * 2, fill=1, stroke=0) + + c.setFillColor(colors.black) + c.setFont("Helvetica", 7) + + # label line + c.drawString(self.padding, self.height + 1, self.label) + # info line + c.drawString(self.padding, 2, self.info) + + +# ----------------------------- +# build stacked drawing for one round +# ----------------------------- + + +def build_stack_for_round(items: list[tuple[str, str, colors.Color]], col_width: float) -> Drawing: + """ + items: list of (label, info, color) + """ + + rows = max(1, len(items)) + total_h = rows * RECT_H + d = Drawing(col_width, total_h) + y = total_h - RECT_H + + for label, info, col in items: + rect_w = max(2, col_width - RECT_PADDING_X * 2) - 4 + if 'proposed' in label: + # dashed border for proposed + d.add(Rect(0, y + 2, rect_w, RECT_H - 4, fillColor=col, strokeColor=colors.black, strokeWidth=1, strokeDashArray=[3, 2])) # type: ignore + else: + # solid border for committed + d.add(Rect(0, y + 2, rect_w, RECT_H - 4, fillColor=col, strokeColor=colors.black)) # type: ignore + + # text: two rows inside rectangle + text_x = RECT_PADDING_X + 3 + base_y = y + 4 + + d.add(String(text_x, base_y + 8, label, fontSize=RECT_LABEL_FONT)) + d.add(String(text_x, base_y, info, fontSize=RECT_INFO_FONT)) + + y -= RECT_H + + # empty case + if len(items) == 0: + rect_w = max(2, col_width - RECT_PADDING_X * 2) - 4 + d.add(Rect(0, total_h / 2 - 6, rect_w, 12, fillColor=colors.whitesmoke, strokeColor=colors.grey)) # type: ignore + d.add(String(RECT_PADDING_X + 2, total_h / 2 - 2, "no action", fontSize=RECT_LABEL_FONT)) + + return d + + +# ----------------------------- +# miniblock section +# ----------------------------- +def has_round_gap(rounds: list[int]) -> bool: + if len(rounds) < 2: + return False + for a, b in zip(rounds, rounds[1:]): + if b != a + 1: + return True + return False + + +def build_miniblock_section(miniblock: dict[str, Any], page_usable_width: float) -> list[Flowable]: + flow = [] + styles = getSampleStyleSheet() + + h = miniblock.get("hash", "") + sender = miniblock.get("senderShardID", "?") + receiver = miniblock.get("receiverShardID", "?") + txc = miniblock.get("txCount", "?") + typ = TYPE_NAMES.get(miniblock.get("type", -1), str(miniblock.get("type", "?"))) + + flow.append(Paragraph(f"Miniblock {h}", styles["Heading3"])) + flow.append(Paragraph(f"- from shard {sender} -> shard {receiver}
- tx_count: {txc}, type: {typ}", styles["BodyText"])) + flow.append(Spacer(1, 4)) + + mentioned = miniblock.get("mentioned", {}) + if not mentioned: + flow.append(Paragraph("No mentions found.", styles["BodyText"])) + flow.append(Spacer(1, 6)) + return flow + + rounds = sorted(mentioned.keys()) + + num_cols = max(1, len(rounds)) + col_width = page_usable_width / num_cols + + header = [ + Paragraph(f"round {r}", styles["BodyText"]) + for r in rounds + ] + + cells = [] + for r in rounds: + items = mentioned.get(r, []) + drawing = build_stack_for_round(items, col_width) + cells.append(drawing) + + tbl = Table( + [header, cells], + colWidths=[col_width] * num_cols, + hAlign="LEFT", + ) + + style = [ + ("GRID", (0, 0), (-1, -1), 0.25, colors.grey), + ("BACKGROUND", (0, 0), (-1, 0), colors.whitesmoke), + ("ALIGN", (0, 0), (-1, 0), "CENTER"), + ("FONTSIZE", (0, 0), (-1, 0), ROUND_HEADER_FONT), + ("VALIGN", (0, 1), (-1, -1), "TOP"), + ] + + if miniblock.get("hasAlarm", False): + style.append(("BOX", (0, 0), (-1, -1), 2, colors.red)) + + tbl.setStyle(TableStyle(style)) + + flow.append(tbl) + flow.append(Spacer(1, 8)) + return flow + + +# ----------------------------- +# PDF builder +# ----------------------------- +def build_pdf_from_miniblocks(epoch: int, miniblocks: list[dict[str, Any]], outname="miniblock_timeline_report.pdf"): + doc = SimpleDocTemplate( + outname, + pagesize=A4, + leftMargin=LEFT_MARGIN, + rightMargin=RIGHT_MARGIN, + topMargin=TOP_MARGIN, + bottomMargin=BOTTOM_MARGIN, + ) + + usable_width = PAGE_WIDTH - LEFT_MARGIN - RIGHT_MARGIN + MAX_PAGE_HEIGHT = PAGE_HEIGHT - TOP_MARGIN - BOTTOM_MARGIN + TITLE_HEIGHT = 75 + MINIBLOCK_WITH_2_ROWS = 135 + EXTRA_LINE_HEIGHT = 18 + + story = [] + current_height = 0 + first_page = True + styles = getSampleStyleSheet() + story.append(Paragraph(f"Miniblock Detail Report — Epoch {epoch}", styles["Title"])) + story.append(Spacer(1, 8)) + for i, mb in enumerate(miniblocks, 1): + num_rects = max(len(v) for v in mb.get("mentioned", {}).values()) + EXTRA_LINES = max(0, num_rects - 2) + + miniblock_height = MINIBLOCK_WITH_2_ROWS + EXTRA_LINES * EXTRA_LINE_HEIGHT + + # if first page, reserve title height + effective_page_height = MAX_PAGE_HEIGHT - (TITLE_HEIGHT if first_page else 0) + + if current_height + miniblock_height > effective_page_height: + story.append(PageBreak()) + current_height = 0 + first_page = False + + story.extend(build_miniblock_section(mb, usable_width)) + current_height += miniblock_height + + doc.build(story) + + +def main(): + + parser = argparse.ArgumentParser(description="Miniblock timeline detail report (CLI)") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--path", type=str, help="Path to run folder") + group.add_argument("--run-name", type=str, help="Name of the folder under ./Reports/") + + args = parser.parse_args() + + # resolve base path + if args.path: + base_path = args.path + else: + base_path = os.path.join("Reports", args.run_name) + + if not os.path.isdir(base_path): + print(f"Error: folder not found: {base_path}") + sys.exit(1) + + miniblocks_path = os.path.join(base_path, "Miniblocks", "miniblocks_report.json") + if not os.path.isfile(miniblocks_path): + print("Error: missing required file:") + print(" -", miniblocks_path) + sys.exit(1) + + # load JSON + with open(miniblocks_path, "r") as f: + data = json.load(f) + + # build report data the exact same way you did before + mb_data = MiniblockData(data["miniblocks"]).get_data_for_detail_report() + + # prepare output folder (keeps reports inside the run folder) + out_folder = os.path.join(base_path, "MiniblocksTimelineDetail") + os.makedirs(out_folder, exist_ok=True) + + # generate PDFs per epoch (same calls as before) + for epoch in sorted(mb_data.keys()): + print(f"Epoch: {epoch}") + report_list = mb_data[epoch] + outpath = os.path.join(out_folder, f"miniblock_timeline_report_epoch_{epoch}.pdf") + build_pdf_from_miniblocks(int(epoch), report_list, outname=outpath) + print(f"Miniblock timeline report generated: {outpath}") + + +if __name__ == "__main__": + main() diff --git a/multiversx_cross_shard_analysis/test_decode_reserved.py b/multiversx_cross_shard_analysis/test_decode_reserved.py new file mode 100644 index 0000000..4c01d9d --- /dev/null +++ b/multiversx_cross_shard_analysis/test_decode_reserved.py @@ -0,0 +1,74 @@ +from multiversx_cross_shard_analysis.decode_reserved import \ + decode_reserved_field + +mentioned_headers = { + "origin_shard_proposed_headers": "20ec12", + "origin_shard_committed_headers": "20ec12", + "dest_shard_proposed_headers_1": "1002208112", + "dest_shard_proposed_headers_2": "18821220ec12", + "dest_shard_committed_headers_1": "1002208112", + "dest_shard_committed_headers_2": "18821220ec12", + "meta_origin_shard_proposed_headers": "08011002208112", + "meta_dest_shard_proposed_headers": "08011002180a208112", + "meta_dest_shard_committed_headers": "", +} + +expected = { + "origin_shard_proposed_headers": { + "ExecutionType": "Normal", + "State": "Final", + "IndexOfFirstTxProcessed": 0, + "IndexOfLastTxProcessed": 2412 + }, + "origin_shard_committed_headers": { + "ExecutionType": "Normal", + "State": "Final", + "IndexOfFirstTxProcessed": 0, + "IndexOfLastTxProcessed": 2412 + }, + "dest_shard_proposed_headers_1": { + "ExecutionType": "Normal", + "State": "PartialExecuted", + "IndexOfFirstTxProcessed": 0, + "IndexOfLastTxProcessed": 2305 + }, + "dest_shard_proposed_headers_2": { + "ExecutionType": "Normal", + "State": "Final", + "IndexOfFirstTxProcessed": 2306, + "IndexOfLastTxProcessed": 2412 + }, + "dest_shard_committed_headers_1": { + "ExecutionType": "Normal", + "State": "PartialExecuted", + "IndexOfFirstTxProcessed": 0, + "IndexOfLastTxProcessed": 2305 + }, + "dest_shard_committed_headers_2": { + "ExecutionType": "Normal", + "State": "Final", + "IndexOfFirstTxProcessed": 2306, + "IndexOfLastTxProcessed": 2412 + }, + "meta_origin_shard_proposed_headers": { + "ExecutionType": "Scheduled", + "State": "PartialExecuted", + "IndexOfFirstTxProcessed": 0, + "IndexOfLastTxProcessed": 2305 + }, + "meta_dest_shard_proposed_headers": { + "ExecutionType": "Scheduled", + "State": "PartialExecuted", + "IndexOfFirstTxProcessed": 10, + "IndexOfLastTxProcessed": 2305 + }, + "meta_dest_shard_committed_headers": {} +} + + +class TestMiniBlockHeader: + + def test_get_processing_type1(self): + for name, hex_str in mentioned_headers.items(): + print(f"Testing decoding for: {name}") + assert decode_reserved_field(hex_str, 2413) == expected[name], f"Decoding failed for {name}" diff --git a/multiversx_cross_shard_analysis/test_miniblocks.py b/multiversx_cross_shard_analysis/test_miniblocks.py new file mode 100644 index 0000000..df1f6f3 --- /dev/null +++ b/multiversx_cross_shard_analysis/test_miniblocks.py @@ -0,0 +1,236 @@ +import json +from enum import Enum + +from multiversx_cross_shard_analysis.header_structures import (Header, + HeaderData, + ShardData) +from multiversx_cross_shard_analysis.miniblock_data import MiniblockData + +header = { + "blockBodyType": 0, + "chainID": "31", + "epoch": 2, + "epochStartMetaHash": "", + "executionResults": [ + { + "accumulatedFees": "0", + "baseExecutionResult": { + "gasUsed": 0, + "headerEpoch": 2, + "headerHash": "be0081efbafed4be3738cdd02ab358a20e8e16f83bfe4b4d7858cfb868366f6b", + "headerNonce": 1647, + "headerRound": 1647, + "rootHash": "a55990c083e7868a6f567bfa3fa6ec9fa017f5eda34ee2b667ef7e55290d8259" + }, + "developerFees": "0", + "executedTxCount": 0, + "miniBlockHeaders": [], + "receiptsHash": "0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8" + } + ], + "gasLimit": 0, + "lastExecutionResult": { + "executionResult": { + "gasUsed": 0, + "headerEpoch": 0, + "headerHash": "be0081efbafed4be3738cdd02ab358a20e8e16f83bfe4b4d7858cfb868366f6b", + "headerNonce": 1647, + "headerRound": 1647, + "rootHash": "a55990c083e7868a6f567bfa3fa6ec9fa017f5eda34ee2b667ef7e55290d8259" + }, + "notarizedInRound": 1648 + }, + "leaderSignature": "f25f5ffa015cb16b4173a17742142e6e7435999b3477cec0320c8892c92e72015453331a8c9c707a03d922f3514c7f0a", + "metaBlockHashes": [ + "ff0c0da960b7a41b7e3b4c6f702b427bcb493fb963dcce35074fa8ecb1391608" + ], + "miniBlockHeaders": [ + { + "hash": "52af8b3c899198e823ef94c80fc12cc4ba301e005d8e67f615ba872226a4963c", + "receiverShardID": 0, + "reserved": "1001", + "senderShardID": 0, + "txCount": 809, + "type": 0 + } + ], + "nonce": 1648, + "peerChanges": [], + "prevHash": "be0081efbafed4be3738cdd02ab358a20e8e16f83bfe4b4d7858cfb868366f6b", + "prevRandSeed": "b6e86481e0751eaf68c6505382ba783c028b837dbcb7c76db19ef14ec7df3d4a6268d161e7fe743c71473ccb89095691", + "randSeed": "018187ce3f41e8f126f8f2f3c336e98417faac23fc96a0d856ab04db20d4fcc58c632f94a6d8ff349eef42de47b82792", + "receiptsHash": "", + "reserved": "", + "round": 1648, + "shardID": 0, + "softwareVersion": "33", + "timestampMs": 1765297076800, + "txCount": 809 +} + +header_exec_result = { + "blockBodyType": 0, + "chainID": "31", + "epoch": 2, + "epochStartMetaHash": "", + "executionResults": [ + { + "accumulatedFees": "46517500000000000", + "baseExecutionResult": { + "gasUsed": 46517500, + "headerEpoch": 2, + "headerHash": "afbd732a8d4842a2bd6fc1edd466b1f8d8b67cbf8737301c83d9cde03f0e7cf0", + "headerNonce": 1648, + "headerRound": 1648, + "rootHash": "ca61fd6e23ff56a5c58016afd83d810b0fa77b1e39b945b2432696c73458ebf3" + }, + "developerFees": "0", + "executedTxCount": 809, + "miniBlockHeaders": [ + { + "hash": "4df428a4f8c34e62382d7bdbec08749188049959131c2acbd514edff1890b28e", + "receiverShardID": 1, + "reserved": "20a806", + "senderShardID": 0, + "txCount": 809, + "type": 0 + } + ], + "receiptsHash": "0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8" + } + ], + "gasLimit": 0, + "lastExecutionResult": { + "executionResult": { + "gasUsed": 46517500, + "headerEpoch": 0, + "headerHash": "afbd732a8d4842a2bd6fc1edd466b1f8d8b67cbf8737301c83d9cde03f0e7cf0", + "headerNonce": 1648, + "headerRound": 1648, + "rootHash": "ca61fd6e23ff56a5c58016afd83d810b0fa77b1e39b945b2432696c73458ebf3" + }, + "notarizedInRound": 1649 + }, + "leaderSignature": "929e5b34dd6ae016deeab2667b67b41baccda435c01d5ad89c9b56b92db6fc526fe3f7d30ba043878ab091bfe7836392", + "metaBlockHashes": [ + "b782177d39d7495558992faed007536df77f576ed790d7117162a732ebcabd6a" + ], + "miniBlockHeaders": [ + { + "hash": "994ceb37eb426a123501928c8c5b67e59f607557fb5f332d5e55fd297ab5d870", + "receiverShardID": 0, + "reserved": "1001", + "senderShardID": 0, + "txCount": 1610, + "type": 0 + } + ], + "nonce": 1649, + "peerChanges": [], + "prevHash": "afbd732a8d4842a2bd6fc1edd466b1f8d8b67cbf8737301c83d9cde03f0e7cf0", + "prevRandSeed": "018187ce3f41e8f126f8f2f3c336e98417faac23fc96a0d856ab04db20d4fcc58c632f94a6d8ff349eef42de47b82792", + "randSeed": "1cefd79bab2fafda3ad83f665fd9aef5840b0736d3cf61dc2e5bc227dec81d3122b847ccf13289484bb15a2141e1ff08", + "receiptsHash": "", + "reserved": "", + "round": 1649, + "shardID": 0, + "softwareVersion": "33", + "timestampMs": 1765297077400, + "txCount": 1610 +} + + +class TestMiniBlockHeader: + def test_header_data(self): + header_data = HeaderData() + header_data.add_committed_header(header_exec_result) + assert header_data.header_dictionary['committed_headers'][0] == header_exec_result + + header_data.add_proposed_header(header_exec_result) + assert header_data.header_dictionary['proposed_headers'][0] == header_exec_result + + def test_header(self): + header_instance = Header(header_exec_result, 'committed') + assert header_instance.metadata['epoch'] == 2 + assert header_instance.metadata['round'] == 1649 + assert header_instance.metadata['shard_id'] == 0 + assert header_instance.metadata['nonce'] == 1649 + assert header_instance.isHeaderV3(header_exec_result) is True + assert len(header_instance.miniblocks) == 2 + + for mention_type, miniblock, metadata in header_instance.miniblocks: + assert mention_type in ["origin_shard_committed", "origin_shard_committed_exec"] + if mention_type == "origin_shard_committed": + assert miniblock['hash'] == "994ceb37eb426a123501928c8c5b67e59f607557fb5f332d5e55fd297ab5d870" + assert metadata['nonce'] == 1649 + elif mention_type == "origin_shard_committed_exec": + assert miniblock['hash'] == "4df428a4f8c34e62382d7bdbec08749188049959131c2acbd514edff1890b28e" + assert metadata['nonce'] == 1648 + + def test_shard_data(self): + header_data = HeaderData() + header_data.add_committed_header(header_exec_result) + header_data.add_proposed_header(header_exec_result) + shard_data = ShardData() + shard_data.add_node(header_data) + assert shard_data.parsed_headers[0].header_dictionary['committed_headers'][0] == header_exec_result + assert shard_data.parsed_headers[0].header_dictionary['proposed_headers'][0] == header_exec_result + assert len(shard_data.miniblocks) == 2 # two miniblocks in the header + + def test_nonce_timeline(self): + header_data = HeaderData() + + header_data.add_committed_header(header) + header_data.add_proposed_header(header) + + header_data.add_committed_header(header_exec_result) + header_data.add_proposed_header(header_exec_result) + + shard_data = ShardData() + shard_data.add_node(header_data) + + print("Miniblocks data:") + print(json.dumps(shard_data.miniblocks, indent=4)) + + timeline = shard_data.get_data_for_header_horizontal_report() + assert len(timeline) == 1 # one epoch + + print("Timeline data:") + print(json.dumps(timeline, indent=4, default=lambda o: o.name if isinstance(o, Enum) else str(o))) + + def test_nonce_timeline_new(self): + header_data = HeaderData() + + header_data.add_committed_header(header) + header_data.add_proposed_header(header) + + header_data.add_committed_header(header_exec_result) + header_data.add_proposed_header(header_exec_result) + + shard_data = ShardData() + shard_data.add_node(header_data) + + miniblock_data = MiniblockData(shard_data.miniblocks) + print("Miniblocks data:") + print(json.dumps(miniblock_data.miniblocks, indent=4)) + + timeline = miniblock_data.get_data_for_header_report() + + print("Timeline data:") + print(json.dumps(timeline, indent=4, default=lambda o: o.name if isinstance(o, Enum) else str(o))) + + def test_miniblock_data_verify(self): + header_data = HeaderData() + + header_data.add_committed_header(header) + header_data.add_proposed_header(header) + + header_data.add_committed_header(header_exec_result) + header_data.add_proposed_header(header_exec_result) + + shard_data = ShardData() + shard_data.add_node(header_data) + + miniblock_data = MiniblockData(shard_data.miniblocks) + print("Miniblocks data:") + print(json.dumps(miniblock_data.miniblocks, indent=4)) diff --git a/multiversx_logs_parser_tools/__init__.py b/multiversx_logs_parser_tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/multiversx_logs_parser_tools/aho_corasik_parser.py b/multiversx_logs_parser_tools/aho_corasik_parser.py new file mode 100644 index 0000000..e571e74 --- /dev/null +++ b/multiversx_logs_parser_tools/aho_corasik_parser.py @@ -0,0 +1,50 @@ +from abc import ABC, abstractmethod +from re import Pattern +from typing import Any + +import ahocorasick +from ahocorasick import Automaton + +from .entry_parser import EntryParser + + +class AhoCorasickParser(ABC): + def __init__(self): + self.initialize_checker() + self.entry_parser = EntryParser(node_name='') + # Create the automaton & add patterns + self.automaton: Automaton = ahocorasick.Automaton() + for pattern, index in self.get_patterns(): + self.automaton.add_word(pattern, index) + self.automaton.make_automaton() + + @abstractmethod + def get_patterns(self) -> list[tuple[Pattern[str], int]]: + return [] + + @abstractmethod + def initialize_checker(self) -> None: + pass + + @abstractmethod + def process_match(self, line: str, end_index: int, pattern_idx: int, args: dict[str, str]) -> dict[str, Any]: + result = {} + if self.should_parse_line(self.get_patterns()[pattern_idx][0]): + result = self.entry_parser.parse_log_entry(line) + return result + + @abstractmethod + def process_parsed_entry(self, parsed_entry: dict[str, Any], args: dict[str, str]) -> None: + pass + + @abstractmethod + def should_parse_line(self, pattern: Pattern[str]) -> bool: + pass + + def parse(self, file: list[str], args: dict[str, str]): + for line in file: + matches: list[tuple[int, int]] = list(self.automaton.iter(line)) + for end_index, pattern_idx in matches: + result = self.process_match(line, end_index, pattern_idx, args) + if result: + self.process_parsed_entry(result, args) diff --git a/multiversx_logs_parser_tools/archive_handler.py b/multiversx_logs_parser_tools/archive_handler.py new file mode 100644 index 0000000..c0d5db4 --- /dev/null +++ b/multiversx_logs_parser_tools/archive_handler.py @@ -0,0 +1,74 @@ + +import argparse +import re +import zipfile +from typing import TypeVar + +from .aho_corasik_parser import AhoCorasickParser +from .helpers import validate_file_path +from .node_logs_checker import NodeLogsChecker + +P = TypeVar("P", bound=AhoCorasickParser) + + +class ArchiveHandler: + def __init__(self, checker: NodeLogsChecker[P], logs_path: str): + self.logs_path = logs_path + zip_name_pattern = r'.*/(.*?).zip' + match = re.match(zip_name_pattern, self.logs_path) + self.run_name = match.group(1) if match else 'unknown-zip-name' + self.checker = checker + + def handle_logs(self): + """Loop through nodes in the zip file and process logs for each node.""" + # Open the zip file and process tar.gz files inside it that each correspond to a node + + with zipfile.ZipFile(self.logs_path, 'r') as zip_file: + # List all files inside the zip + file_list = zip_file.namelist() + + for file_name in file_list: + if file_name.endswith(".tar.gz"): + node_name = file_name.replace(".tar.gz", "").rsplit("--", 1)[1] + print(f"Processing node {node_name}") + + # Open the tar.gz file as bytes + with zip_file.open(file_name) as tar_file_io: + args = argparse.Namespace( + node_name=node_name, + run_name=self.run_name, + ) + self.checker.reset_node(args) + self.checker.handle_node_from_archive(tar_file_io) + self.checker.post_process_node_logs() + self.process_node_data() + self.process_run_data() + + def process_node_data(self): + """Process the parsed data for a single node.""" + pass + + def process_run_data(self): + """Process the parsed data for the entire run.""" + pass + + @staticmethod + def get_path() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=''' + Runs node log checks. Example script: + + python ansible/templates/logs-checker/archive_handler.py --path=logsPath/logs_archive.zip + ''', + epilog='\n', + formatter_class=argparse.RawTextHelpFormatter + ) + + parser.add_argument( + '--path', + required=True, + type=validate_file_path, + help='Path to the run zip file.' + ) + + return parser.parse_args() diff --git a/multiversx_logs_parser_tools/entry_parser.py b/multiversx_logs_parser_tools/entry_parser.py new file mode 100644 index 0000000..efeceb6 --- /dev/null +++ b/multiversx_logs_parser_tools/entry_parser.py @@ -0,0 +1,83 @@ +import json +import re +from typing import Any + +log_entry_pattern = re.compile( + r'^(?PWARN|ERROR|DEBUG|TRACE|INFO)\s*\[' # Log level + r'(?P[^\]]+)\]\s*' # Timestamp + r'\[(?P[^\]]+)\]\s*' # Logger name + r'\[(?P[^\]]*)\]\s*' # context inside brackets + r'(?P.*)$', # The rest of the log message + re.MULTILINE +) + +context_pattern = re.compile( + r'(?P\S+)/(?P\d+)/(?P\d+)(?:/\((?P[^\)]+)\))?/?') + +separator = ' ' + + +class EntryParser: + ''' + Parses entries with the format: + log_level [2025-04-29 07:46:37.102] [logger] [shard/epoch/round/(subround)] entry_content + + The context [shard/epoch/round/(subround)] can be either fully formed like in '0/4/805/(END_ROUND)' or 'metachain/13/2648/(START_ROUND)' + or partially formed, like in '/0/0/', 'metachain/2/400/' + + The content of the entry is separated using the predefined separator. If the separator is not present, and a distinction cannot be made + between the message and parameters, it returns the entire entry content as message + + ''' + + def __init__(self, node_name: str): + self.alerts = [] + + def parse_context(self, context: str) -> dict[str, Any]: + # Parse shard, epoch, round, subround from context + context_match = context_pattern.match(context) + if context_match: + subround = context_match.group('subround') + return {'shard': context_match.group('shard').strip(), 'epoch': context_match.group('epoch').strip(), + 'round': context_match.group('round').strip(), 'subround': subround.strip() if subround else ''} + else: + return {'shard': '', 'epoch': 0, 'round': 0, 'subround': ''} + + def parse_message(self, message: str): + if separator in message: + # if the separator is present, split the content between message and parameters using the separator + message, parameters = message.split(separator, 1) + return message.strip(), parameters.strip() + + elif ' = ' in message: + # if no separator, but the content includes '=', assume first parameter is the word before the '=' and split before that word + message_parts = message.split(' = ', 1) + message, first_parameter_label = message_parts[0].rsplit(' ', 1) + return message.strip(), first_parameter_label.strip() + ' = ' + message_parts[1].strip() + + else: + # no parameters in the entry or cannot determine if there are parameters present + return message.strip(), '' + + def parse_log_entry(self, log_content: str) -> dict[str, str]: + data = {} + match = log_entry_pattern.search(log_content) + if match: + data = match.groupdict() + context = self.parse_context(data.pop('context')) + data.update(context) + + message, parameters = self.parse_message(match['message']) + data['message'] = message + data['parameters'] = parameters + + return data + + +if __name__ == "__main__": + content = 'DEBUG[2025-11-11 17:09:06.028] [..nsus/spos/bls/v1] [metachain/0/3/(BLOCK)] Proposed header received v1 header = {"accumulatedFees": "0","accumulatedFeesInEpoch":"0","chainID":"6c6f63616c2d746573746e6574","devFeesInEpoch":"0","developerFees":"0","epoch":0,"epochStart":{"economics":{"nodePrice":null,"prevEpochStartHash":"","prevEpochStartRound":0,"rewardsForProtocolSustainability":null,"rewardsPerBlock":null,"totalNewlyMinted":null,"totalSupply":null,"totalToDistribute":null},"lastFinalizedHeaders":[]},"leaderSignature":"","miniBlockHeaders":[],"nonce":3,"peerInfo":[],"prevHash":"bbc1249e07d98aabfdb3e735e35142800df013694780497df76778a27db62033","prevRandSeed":"8b6a73f9f4d34f9355cd4399f8c6f14e1296184ea32636ebd58709d62bd35bbe6b992dd3104fa0a64e5fcff9398e0502","pubKeysBitmap":"","randSeed":"41e9c758555f4a33f5de954d6e7cc2d252cacf3ddadfd3fbac1d9ddca2382681e56ab30c65029b29212eb69b13a47906","receiptsHash":"0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8","reserved":"","rootHash":"e4a0f900f1ea487a61d3832776d05871e81f7975670a69b1febbf212f4cea5cc","round":3,"shardInfo":[{"accumulatedFees":"0","developerFees":"0","epoch":0,"headerHash":"8da01a5fbda1484d915740b824ebb1de11722ae501e3c8a4e21d9ce96d1a4c1d","lastIncludedMetaNonce":0,"nonce":1,"numPendingMiniBlocks":0,"prevHash":"00fd532ea2e896b86cd70e189c5e716fcfaaac8b7a060e75421d369c19db78e3","prevRandSeed":"f501347f089b236bdd605babb0e27af0b7df76e45628d35072bc87eec4178c0c","pubKeysBitmap":"07","round":1,"shardID":0,"shardMiniBlockHeaders":[],"signature":"","txCount":0},{"accumulatedFees":"0","developerFees":"0","epoch":0,"headerHash":"3897723b58aa6e1949a415a54598e5655363b4a4c967827ef045326f1ca9e216","lastIncludedMetaNonce":0,"nonce":1,"numPendingMiniBlocks":0,"prevHash":"5a62b8bd0aa019e8967dbdc446ab610b98bf59525cc0672e77ea8e74d3e6a3b3","prevRandSeed":"e626f319e5c70a6c3b4e96258a635b8f7e1efb224c285c830cbb4e473b187c1a","pubKeysBitmap":"07","round":1,"shardID":1,"shardMiniBlockHeaders":[],"signature":"","txCount":0}],"signature":"","softwareVersion":"64656661756c74","timeStamp":1762873746,"txCount":0,"validatorStatsRootHash":"d3f82f56f69f4c26a913a8a5721dfdd85cdd70ad4efdfb464c4f1f6ddd4f8dea"} ' + result = EntryParser('').parse_log_entry(content) + parameter = result.pop('parameters').split(' = ', 1)[1] + header = json.loads(parameter) + print(json.dumps(result, indent=4)) + print(json.dumps(header, indent=4)) diff --git a/multiversx_logs_parser_tools/helpers.py b/multiversx_logs_parser_tools/helpers.py new file mode 100644 index 0000000..51cec21 --- /dev/null +++ b/multiversx_logs_parser_tools/helpers.py @@ -0,0 +1,20 @@ +import argparse +import os +from typing import Any + + +def extend_dict(dict_to_extend: dict[str, Any], key: str): + if key not in dict_to_extend.keys(): + dict_to_extend[key] = {} + + +def validate_file_path(path: str): + if not os.path.isfile(path): + raise argparse.ArgumentTypeError(f"File '{path}' does not exist.") + return path + + +def validate_folder_path(path: str): + if not os.path.isdir(path): + raise argparse.ArgumentTypeError(f"Folder '{path}' does not exist.") + return path diff --git a/multiversx_logs_parser_tools/node_logs_checker.py b/multiversx_logs_parser_tools/node_logs_checker.py new file mode 100644 index 0000000..e8696ab --- /dev/null +++ b/multiversx_logs_parser_tools/node_logs_checker.py @@ -0,0 +1,91 @@ + +try: + # When this module is imported as part of a package + from .aho_corasik_parser import AhoCorasickParser +except Exception: + # Fallback when running the script directly (not as a package) + from aho_corasik_parser import AhoCorasickParser + +import argparse +import json +import os +import tarfile +from pathlib import Path +from typing import IO, Any, Generic, Type, TypeVar + +"""Abstract Base Class for Node Logs Checker.""" + + +P = TypeVar("P", bound=AhoCorasickParser) + + +class NodeLogsChecker(Generic[P]): + def __init__(self, parser_cls: Type[P], args: argparse.Namespace): + self.parser: P = parser_cls() + self.initialize_checker(args) + + """ Parses a .log file for the given node. """ + + def process_parsed_result(self): + pass + + """ Post-process the node logs after all log files have been parsed. """ + + def post_process_node_logs(self): + pass + + def initialize_checker(self, args: argparse.Namespace): + pass + + def create_json_for_node(self) -> dict[str, Any]: + return {} + + def reset_node(self, args: argparse.Namespace): + self.node_name = args.node_name if args.node_name else 'unknown-node' + self.run_name = args.run_name if args.run_name else 'unknown-run' + + def handle_node_from_archive(self, tar_gz_contents: IO[bytes]): + with tarfile.open(fileobj=tar_gz_contents, mode='r:gz') as logs_archive: + # sort logs in alphabetic/chronological order + sorted_members = sorted( + logs_archive.getmembers(), + key=lambda member: member.name + ) + + # process all log files for the node + for member in sorted_members: + if member.name.startswith('logs/logs/') and member.name.endswith('.log'): + print(" Processing log file:", member.name) + raw_data = logs_archive.extractfile(member) + if not raw_data: + continue + + with raw_data as f: + # Decode and pass an iterable (a list of lines) + log_lines = [line.decode("utf-8") for line in f] + self.parser.parse(log_lines, {}) + self.process_parsed_result() + + def handle_node_from_folder(self, node_logs_path: str): + files = sorted(Path(node_logs_path).glob('*.log')) + for file in files: + with open(file, 'r') as f: + log_lines = f.readlines() + self.parser.parse(log_lines, {}) + self.process_parsed_result() + + def write_node_json(self, path=''): + if not path: + node_reports_path = f'./Reports/{self.run_name}/Nodes' + output_file = Path(f'{node_reports_path}/{self.node_name}_report.json') + directory = os.path.dirname(output_file) + Path(directory).mkdir(parents=True, exist_ok=True) + else: + output_file = Path(path + f'/{self.node_name}_report.json') + with open(output_file, "w") as json_file: + json.dump(self.create_json_for_node(), json_file, indent=4) + + @classmethod + def from_args(cls: Type['NodeLogsChecker[P]'], parser_cls: Type[P], args: argparse.Namespace) -> 'NodeLogsChecker[P]': + instance = cls(parser_cls, args) + return instance diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 0000000..8b3ea3e --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,16 @@ +{ + "include": [], + "exclude": [ + "**/__pycache__", + ], + "ignore": [], + "defineConstant": { + "DEBUG": true + }, + "venvPath": ".", + "venv": "venv", + "stubPath": "", + "reportMissingImports": true, + "reportMissingTypeStubs": false, + "reportUnknownParameterType": true +} diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..ba07244 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +autopep8 +flake8 +pytest +pyright diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c6609dc --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pyahocorasick==2.2.0