multiversx · mradian1 · Nov 12, 2025 · Nov 14, 2025 · Nov 15, 2025 · Nov 17, 2025
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+ignore = E501, E722
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,12 @@ __pycache__/
 # C extensions
 *.so
 
+# Json files
+*.json
+
+# Pdf files
+*.pdf
+
 # Distribution / packaging
 .Python
 build/

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,19 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+
+  - repo: https://github.com/PyCQA/flake8
+    rev: 7.0.0
+    hooks:
+      - id: flake8
+        args:
+          - "--config=.flake8"
+
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
diff --git a/.vscode/ltex.dictionary.en-US.txt b/.vscode/ltex.dictionary.en-US.txt
@@ -0,0 +1,2 @@
+AhoCorasikParser
+NodeLogsChecker
diff --git a/README.md b/README.md
@@ -1,2 +1,61 @@
 # mx-chain-logs-parser
-Logs parsing utilities.
+Logs parsing utilities and applications
+
+## LOGS PARSER TOOLS:
+The tool provides general abstract classes that can be useful for parsing logs.
+In order to create an application that uses off-line parsing of logs files, these classes must be inherited and methods should be implemented for that particular case.
+
+### ARCHIVE HANDLER
+- General application processing class, that loops through the nodes in the downloaded logs archive and calls its NodeLogsChecker instance for each one of them
+- run level methods should be implemented in inheriting classes
+
+### NODE LOGS CHECKER
+- Node level processing, that loops through individual log files for a node and calls its instance of the AhoCorasikParser to search for entries with pre-defined key phrases
+- node level methods should be implemented in inheriting classes
+
+### AHO-CORASIK PARSER
+- Log level processing implementing the Aho-Corasick algorithm that searches for a list of given keywords simultaneously. It uses an *EntryParser* to extract information from the entries of interest
+
+### ENTRY PARSER
+- Entry level processing, divides the log entry into its basic components: log level, context, message, parameters
+- can be extended with re recognition to handle specific cases
+
+
+## CROSS SHARD ANALYSIS TOOL
+This tool validates that cross shard mini-blocks are executed (and proposed) in strict order, without gaps or duplications.
+It uses color coded data to illustrate each state in the processing. A configuration file (issues.py) is provided to signal certain issues with the miniblock production.
+
+The tool creates a run specific folder under Reports that includes parsed headers in the *Shards* subfolder, mini-blocks in the *Miniblocks* folder.  
+The generated reports will also be included in this folder, in individual sub-folders named after the respective report: 
+- **MiniblocksShardTimeline** contains a report that goes through rounds and displays what mini-blocks where proposed, executed or notarized for each shard; individual pdf files are generated for each epoch; 
+- **MiniblocksTimelineDetails** will produce a timeline of mini-blocks for each shard, type of miniblock and other information is included for each one of them; 
+- **NonceTimeline** ; will produce a timeline of headers processed, originating from each shard. Alarms, like round gaps, missing  are representedd by colored borders;
+- **NonceAlarms** this report is similar to the NonceTimeline report, but only includes headers that have issues. The report is divided into chapters for each type of alarm. A header may be included in more than one such category, depending on its characteristics. 
+
+INSTALL
+Create a virtual environment and install the dependencies:
+
+```
+python3 -m venv ./venv
+source ./venv/bin/activate
+pip install -r ./requirements.txt --upgrade
+export PYTHONPATH=.
+```
+
+INSTALL DEVELOPMENT DEPENDENCIES
+```
+pip install -r ./requirements-dev.txt --upgrade
+```
+
+EXAMPLE USAGE
+```
+python -m multiversx_cross_shard_analysis.gather_data --path ~/Downloads/cross-shard-execution-anal-9afe696daf.zip
+```
+where the argument --path is mandatory, describing the path to the zip file containing the logs.
+The command will also generate all reports available, saving them inside a subfolder of Reports with the same name as the zip file provided.
+
+In order to run a specific report from the report folder:
+```
+python -m multiversx_cross_shard_analysis.headers_timeline_report --run-name cross-shard-execution-anal-6cc663f7af
+``` 
+where --run-name is the name of the subfolder where the run's files reside.
diff --git a/multiversx_cross_shard_analysis/__init__.py b/multiversx_cross_shard_analysis/__init__.py
diff --git a/multiversx_cross_shard_analysis/constants.py b/multiversx_cross_shard_analysis/constants.py
@@ -0,0 +1,121 @@
+from enum import Enum
+
+from reportlab.lib import colors
+
+origin_shard = "origin_shard"
+dest_shard = "dest_shard"
+meta = "meta"
+proposed = "proposed"
+committed = "committed"
+
+MiniBlockTypes = Enum("MiniBlockType", [
+    'MiniBlockHeaders',
+    'ShardInfo',
+    'ExecutionResults'
+])
+
+MentionType = Enum("MentionType", [
+    # miniblock is mentioned in origin shard header
+    "origin_shard_proposed",
+    "origin_shard_committed",
+
+    # miniblock is mentioned in an execution result, either on origin or destination shard
+    "origin_exec_proposed",
+    "origin_exec_committed",
+
+    # notarization of shard miniblock when meta includes the shard header
+    "meta_origin_shard_proposed",
+    "meta_origin_shard_committed",
+
+    # miniblock is mentioned in destination shard header
+    "dest_shard_proposed",
+    "dest_shard_committed",
+
+    # miniblock is mentioned in an execution result, either on origin or destination shard
+    "dest_exec_proposed",
+    "dest_exec_committed",
+
+    # notarization of shard miniblock when meta includes the shard header
+    "meta_dest_shard_proposed",
+    "meta_dest_shard_committed",
+
+    # notarization of execution results when meta includes the header containing the execution result for origin shard
+    "meta_origin_exec_proposed",
+    "meta_origin_exec_committed",
+
+    # notarization of execution results when meta includes the header containing the execution result for destination shard
+    "meta_dest_exec_proposed",
+    "meta_dest_exec_committed",
+])
+
+
+# Mappings from field number to field name for MiniBlockHeaderReserved
+FIELD_NAME_MAPPING = {
+    1: "ExecutionType",
+    2: "State",
+    3: "IndexOfFirstTxProcessed",
+    4: "IndexOfLastTxProcessed",
+}
+
+# Mappings for enum values from block.proto
+PROCESSING_TYPE_MAPPING = {
+    0: "Normal",
+    1: "Scheduled",
+    2: "Processed",
+}
+
+# Mappings for miniblock state enum values from block.proto
+MINIBLOCK_STATE_MAPPING = {
+    0: "Final",
+    1: "Proposed",
+    2: "PartialExecuted",
+}
+
+# type names
+TYPE_NAMES = {
+    0: "TxBlock",
+    30: "StateBlock",
+    60: "PeerBlock",
+    90: "SCResultBlock",
+    120: "InvalidBlock",
+    150: "ReceiptBlock",
+    255: "RewardsBlock",
+}
+
+Colors = Enum("Colors", [
+    "origin_proposed",
+    "origin_partial_executed",
+    "origin_final",
+    "dest_proposed",
+    "dest_partial_executed",
+    "dest_final",
+    "meta_origin_committed",
+    "meta_dest_committed",
+    "origin_exec_proposed",
+    "origin_exec_partial_executed",
+    "origin_exec_final",
+    "dest_exec_proposed",
+    "dest_exec_partial_executed",
+    "dest_exec_final",
+    "meta_origin_exec_committed",
+    "meta_dest_exec_committed",
+])
+
+COLORS_MAPPING = {
+    Colors.origin_proposed: colors.lightyellow,
+    Colors.origin_partial_executed: colors.orange,
+    Colors.origin_final: colors.yellow,
+    Colors.dest_proposed: colors.mistyrose,
+    Colors.dest_partial_executed: colors.palevioletred,
+    Colors.dest_final: colors.pink,
+    Colors.meta_origin_committed: colors.lightgreen,
+    Colors.meta_dest_committed: colors.lightblue,
+    Colors.origin_exec_proposed: colors.khaki,
+    Colors.origin_exec_partial_executed: colors.gold,
+    Colors.origin_exec_final: colors.yellow,
+    Colors.dest_exec_proposed: colors.lightcoral,
+    Colors.dest_exec_partial_executed: colors.crimson,
+    Colors.dest_exec_final: colors.pink,
+    Colors.meta_origin_exec_committed: colors.mediumseagreen,
+    Colors.meta_dest_exec_committed: colors.cornflowerblue,
+}
diff --git a/multiversx_cross_shard_analysis/decode_reserved.py b/multiversx_cross_shard_analysis/decode_reserved.py
@@ -0,0 +1,66 @@
+
+from typing import Any
+
+from multiversx_cross_shard_analysis.constants import (FIELD_NAME_MAPPING,
+                                                       MINIBLOCK_STATE_MAPPING,
+                                                       PROCESSING_TYPE_MAPPING)
+
+
+def get_default_decoded_data(tx_count: int) -> dict[str, Any]:
+    """
+    Returns a dictionary with the default values for the MiniBlockHeaderReserved struct.
+    """
+    return {
+        "ExecutionType": "Normal",
+        "State": "Final",
+        "IndexOfFirstTxProcessed": 0,
+        "IndexOfLastTxProcessed": tx_count - 1 if tx_count > 0 else 0,
+    }
+
+
+def decode_reserved_field(hex_string: str, tx_count: int) -> dict[str, Any]:
+    """
+    Decodes the reserved field from a hex string into a human-readable dictionary,
+    including default values for missing fields.
+    """
+    decoded_data = get_default_decoded_data(tx_count)
+
+    if not hex_string:
+        return {}
+
+    byte_data = bytes.fromhex(hex_string)
+    i = 0
+    while i < len(byte_data):
+        field_and_type = byte_data[i]
+        field_number = field_and_type >> 3
+        wire_type = field_and_type & 0x07
+        i += 1
+
+        if wire_type == 0:  # Varint
+            value = 0
+            shift = 0
+            while True:
+                if i >= len(byte_data):
+                    decoded_data["error"] = "Incomplete varint data"
+                    return decoded_data
+                byte = byte_data[i]
+                value |= (byte & 0x7F) << shift
+                i += 1
+                if not (byte & 0x80):
+                    break
+                shift += 7
+
+            field_name = FIELD_NAME_MAPPING.get(field_number, f"UnknownField_{field_number}")
+
+            if field_name == "ExecutionType":
+                decoded_data[field_name] = PROCESSING_TYPE_MAPPING.get(value, f"UnknownProcessingType_{value}")
+            elif field_name == "State":
+                decoded_data[field_name] = MINIBLOCK_STATE_MAPPING.get(value, f"UnknownState_{value}")
+            else:
+                decoded_data[field_name] = value
+
+        else:
+            decoded_data["error"] = f"Unsupported wire type: {wire_type}"
+            break
+
+    return decoded_data
diff --git a/multiversx_cross_shard_analysis/gather_data.py b/multiversx_cross_shard_analysis/gather_data.py
@@ -0,0 +1,76 @@
+import os
+from datetime import datetime, timedelta
+
+from multiversx_cross_shard_analysis.headers_timeline_report import \
+    build_nonce_timeline_pdf
+from multiversx_cross_shard_analysis.miniblock_data import MiniblockData
+from multiversx_cross_shard_analysis.miniblocks_round_report import \
+    build_report
+from multiversx_cross_shard_analysis.miniblocks_timeline_report import \
+    build_pdf_from_miniblocks
+
+from .headers_alarms_report import build_nonce_alarms_timeline_pdf
+from .header_analysis_archive_handler import HeaderAnalysisArchiveHandler
+from .header_analysis_checker import HeaderAnalysisChecker
+from .header_analysis_parser import HeaderAnalysisParser
+
+
+def gather_data():
+    time_started = datetime.now()
+    print('Starting cross-shard analysis...')
+    args = HeaderAnalysisArchiveHandler.get_path()
+    header_checker = HeaderAnalysisChecker(HeaderAnalysisParser, args)
+    handler = HeaderAnalysisArchiveHandler(header_checker, args.path)
+    handler.handle_logs()
+    print(f'Archive checked successfully: {timedelta(seconds=(datetime.now() - time_started).total_seconds())}s')
+
+    # Generate reports
+    mb_data = MiniblockData(handler.shard_data.miniblocks).get_data_for_round_report()
+    out_folder = os.path.join(handler.run_name, "MiniblocksShardTimeline")
+    out_folder = os.path.join('Reports', out_folder)
+    os.makedirs(out_folder, exist_ok=True)
+
+    # generate PDFs per epoch
+    for epoch in sorted(mb_data.keys()):
+        print(f"Epoch: {epoch}")
+        report_dict = mb_data[epoch]
+        outfile = os.path.join(out_folder, f"shards_timeline_report_{epoch}.pdf")
+        build_report(int(epoch), report_dict, shards=[0, 1, 2, 4294967295], outname=outfile)
+        print("→", outfile)
+
+    mb_data = MiniblockData(handler.shard_data.miniblocks).get_data_for_detail_report()
+    out_folder = os.path.join(handler.run_name, "MiniblocksTimelineDetail")
+    out_folder = os.path.join('Reports', out_folder)
+    os.makedirs(out_folder, exist_ok=True)
+
+    for epoch in sorted(mb_data.keys()):
+        print(f"Epoch: {epoch}")
+        outfile = os.path.join(out_folder, f"miniblock_timeline_report_epoch_{epoch}.pdf")
+        build_pdf_from_miniblocks(int(epoch), mb_data[epoch], outname=outfile)
+        print("→", outfile)
+
+    input_data, nonce_alarms = MiniblockData(handler.shard_data.miniblocks).get_data_for_header_report()
+    out_folder = os.path.join(handler.run_name, "NonceTimeline")
+    out_folder = os.path.join('Reports', out_folder)
+    os.makedirs(out_folder, exist_ok=True)
+
+    for epoch in sorted(input_data.keys()):
+        print(f"Epoch: {epoch}")
+        outfile = os.path.join(out_folder, f"nonce_timeline_report_{epoch}.pdf")
+        build_nonce_timeline_pdf(input_data[epoch], nonce_alarms, outname=outfile)
+        print("→", outfile)
+
+    input_data = MiniblockData(handler.shard_data.miniblocks).get_data_for_header_alarms_report()
+    out_folder = os.path.join(handler.run_name, "NonceAlarms")
+    out_folder = os.path.join('Reports', out_folder)
+    os.makedirs(out_folder, exist_ok=True)
+
+    for epoch in sorted(input_data.keys()):
+        print(f"Epoch: {epoch}")
+        outfile = os.path.join(out_folder, f"nonce_alarms_report_{epoch}.pdf")
+        build_nonce_alarms_timeline_pdf(input_data[epoch], outname=outfile)
+        print("→", outfile)
+
+
+if __name__ == "__main__":
+    gather_data()