From 00b8a88e974c7bf5c01df3a4ee3963715d7f28ab Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 29 Jan 2026 21:40:49 +0000 Subject: [PATCH 1/4] Initial plan From d5a9a2232c7d1353536a882051618a9592baca11 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 29 Jan 2026 21:44:44 +0000 Subject: [PATCH 2/4] Add detailed I/O data collection and injection into user namespace - Added pandas as dependency - Enhanced parse_strace_line and parse_fs_usage_line to collect detailed data (path, operation, syscall, size_bytes) - Added -y flag to strace for file path extraction - Modified measure_macos_osascript and measure_linux_strace to collect detailed data - Updated magic.py to inject iops_detailed_data variable into user namespace - DataFrame when detailed data available - String message when using psutil mode Co-authored-by: mtauraso <31012+mtauraso@users.noreply.github.com> --- pyproject.toml | 1 + src/iops_profiler/collector.py | 104 ++++++++++++++++++++++++++------- src/iops_profiler/magic.py | 29 ++++++++- 3 files changed, 110 insertions(+), 24 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 28392e1..8c88f09 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "psutil", "matplotlib", "numpy", + "pandas", ] [project.urls] diff --git a/src/iops_profiler/collector.py b/src/iops_profiler/collector.py index 47d8348..1715760 100644 --- a/src/iops_profiler/collector.py +++ b/src/iops_profiler/collector.py @@ -66,28 +66,31 @@ def __init__(self, shell): self._io_syscalls = set(STRACE_IO_SYSCALLS) @staticmethod - def parse_fs_usage_line_static(line, byte_pattern=None, collect_ops=False): + def parse_fs_usage_line_static(line, byte_pattern=None, collect_ops=False, collect_detailed=False): """Parse a single fs_usage output line for I/O operations (static version) Args: line: The line to parse byte_pattern: Compiled regex pattern for extracting byte count (optional) collect_ops: If True, return full operation info for histogram collection + collect_detailed: If True, return detailed info including path and syscall Returns: If collect_ops is False: (op_type, bytes_transferred) If collect_ops is True: {'type': op_type, 'bytes': bytes_transferred} + If collect_detailed is True: {'path': str, 'operation': str, 'syscall': str, 'size_bytes': int} """ parts = line.split() if len(parts) < 2: - return None if collect_ops else (None, 0) + return None if (collect_ops or collect_detailed) else (None, 0) - syscall = parts[1].lower() + syscall_raw = parts[1] + syscall = syscall_raw.lower() is_read = "read" in syscall is_write = "write" in syscall if not (is_read or is_write): - return None if collect_ops else (None, 0) + return None if (collect_ops or collect_detailed) else (None, 0) # Extract byte count from B=0x[hex] pattern using compiled regex if byte_pattern is None: @@ -99,25 +102,36 @@ def parse_fs_usage_line_static(line, byte_pattern=None, collect_ops=False): op_type = "read" if is_read else "write" - if collect_ops: + # Extract file path (typically 4th column in fs_usage output) + path = parts[3] if len(parts) > 3 else "" + + if collect_detailed: + return { + "path": path, + "operation": op_type, + "syscall": syscall_raw, + "size_bytes": bytes_transferred, + } + elif collect_ops: return {"type": op_type, "bytes": bytes_transferred} return op_type, bytes_transferred - def parse_fs_usage_line(self, line, collect_ops=False): + def parse_fs_usage_line(self, line, collect_ops=False, collect_detailed=False): """Parse a single fs_usage output line for I/O operations (instance method) This is a convenience wrapper that uses the instance's compiled byte pattern. """ - return self.parse_fs_usage_line_static(line, self._fs_usage_byte_pattern, collect_ops) + return self.parse_fs_usage_line_static(line, self._fs_usage_byte_pattern, collect_ops, collect_detailed) @staticmethod - def parse_strace_line_static(line, strace_pattern, io_syscalls, collect_ops=False): + def parse_strace_line_static(line, strace_pattern, io_syscalls, collect_ops=False, collect_detailed=False): """Parse a single strace output line for I/O operations (static version) Example strace lines: 3385 write(3, "Hello World...", 1100) = 1100 3385 read(3, "data", 4096) = 133 3385 pread64(3, "...", 1024, 0) = 1024 + 3385 read(3, "data", 4096) = 133 # With -y flag Note: Lines with or <... resumed> are not matched as they don't contain complete result information in a single line. @@ -127,22 +141,24 @@ def parse_strace_line_static(line, strace_pattern, io_syscalls, collect_ops=Fals strace_pattern: Compiled regex pattern for strace output io_syscalls: Set of I/O syscall names to track collect_ops: If True, return full operation info for histogram collection + collect_detailed: If True, return detailed info including path and syscall Returns: If collect_ops is False: (op_type, bytes_transferred) If collect_ops is True: {'type': op_type, 'bytes': bytes_transferred} + If collect_detailed is True: {'path': str, 'operation': str, 'syscall': str, 'size_bytes': int} """ # Match patterns like: PID syscall(fd, ..., size) = result match = strace_pattern.match(line) if not match: - return None if collect_ops else (None, 0) + return None if (collect_ops or collect_detailed) else (None, 0) - pid, syscall, result = match.groups() - syscall = syscall.lower() + pid, syscall_raw, result = match.groups() + syscall = syscall_raw.lower() # Check if it's one of the I/O syscalls we're tracking if syscall not in io_syscalls: - return None if collect_ops else (None, 0) + return None if (collect_ops or collect_detailed) else (None, 0) # Determine if it's a read or write operation based on syscall name if "read" in syscall: @@ -150,25 +166,38 @@ def parse_strace_line_static(line, strace_pattern, io_syscalls, collect_ops=Fals elif "write" in syscall: is_read = False else: - return None if collect_ops else (None, 0) + return None if (collect_ops or collect_detailed) else (None, 0) # The return value is the number of bytes transferred (or -1 on error) bytes_transferred = int(result) if bytes_transferred < 0: - return None if collect_ops else (None, 0) + return None if (collect_ops or collect_detailed) else (None, 0) op_type = "read" if is_read else "write" - if collect_ops: + # Try to extract file path from fd with -y flag format: fd + path_match = re.search(r'\d+<([^>]+)>', line) + path = path_match.group(1) if path_match else "" + + if collect_detailed: + return { + "path": path, + "operation": op_type, + "syscall": syscall_raw, + "size_bytes": bytes_transferred, + } + elif collect_ops: return {"type": op_type, "bytes": bytes_transferred} return op_type, bytes_transferred - def parse_strace_line(self, line, collect_ops=False): + def parse_strace_line(self, line, collect_ops=False, collect_detailed=False): """Parse a single strace output line for I/O operations (instance method) This is a convenience wrapper that uses the instance's strace pattern and syscalls. """ - return self.parse_strace_line_static(line, self._strace_pattern, self._io_syscalls, collect_ops) + return self.parse_strace_line_static( + line, self._strace_pattern, self._io_syscalls, collect_ops, collect_detailed + ) @staticmethod def _create_helper_script(pid, output_file, control_file): @@ -226,12 +255,13 @@ def _launch_helper_via_osascript(self, helper_script_path): ) return proc - def measure_macos_osascript(self, code, collect_ops=False): + def measure_macos_osascript(self, code, collect_ops=False, collect_detailed=False): """Measure IOPS on macOS using fs_usage via osascript Args: code: The code to profile collect_ops: If True, collect individual operation sizes for histogram + collect_detailed: If True, collect detailed I/O data for DataFrame """ pid = os.getpid() @@ -296,11 +326,23 @@ def measure_macos_osascript(self, code, collect_ops=False): read_bytes = 0 write_bytes = 0 operations = [] if collect_ops else None + detailed_data = [] if collect_detailed else None if os.path.exists(output_file): with open(output_file, "r") as f: for line in f: - if collect_ops: + if collect_detailed: + detail = self.parse_fs_usage_line(line, collect_detailed=True) + if detail: + detailed_data.append(detail) + # Also update counts for regular metrics + if detail["operation"] == "read": + read_count += 1 + read_bytes += detail["size_bytes"] + elif detail["operation"] == "write": + write_count += 1 + write_bytes += detail["size_bytes"] + elif collect_ops: op = self.parse_fs_usage_line(line, collect_ops=True) if op: operations.append(op) @@ -331,6 +373,9 @@ def measure_macos_osascript(self, code, collect_ops=False): if collect_ops: result["operations"] = operations + if collect_detailed: + result["detailed_data"] = detailed_data + return result finally: @@ -355,12 +400,13 @@ def measure_macos_osascript(self, code, collect_ops=False): except OSError: pass # File already deleted or permission issue - def measure_linux_strace(self, code, collect_ops=False): + def measure_linux_strace(self, code, collect_ops=False, collect_detailed=False): """Measure IOPS on Linux using strace (no elevated privileges required) Args: code: The code to profile collect_ops: If True, collect individual operation sizes for histogram + collect_detailed: If True, collect detailed I/O data for DataFrame """ pid = os.getpid() @@ -385,6 +431,7 @@ def measure_linux_strace(self, code, collect_ops=False): strace_cmd = [ "strace", "-f", # Follow forks + "-y", # Print paths associated with file descriptor arguments "-e", f"trace={syscalls_to_trace}", "-o", @@ -432,12 +479,24 @@ def measure_linux_strace(self, code, collect_ops=False): read_bytes = 0 write_bytes = 0 operations = [] if collect_ops else None + detailed_data = [] if collect_detailed else None if os.path.exists(output_file): try: with open(output_file, "r", errors="ignore") as f: for line in f: - if collect_ops: + if collect_detailed: + detail = self.parse_strace_line(line, collect_detailed=True) + if detail: + detailed_data.append(detail) + # Also update counts for regular metrics + if detail["operation"] == "read": + read_count += 1 + read_bytes += detail["size_bytes"] + elif detail["operation"] == "write": + write_count += 1 + write_bytes += detail["size_bytes"] + elif collect_ops: op = self.parse_strace_line(line, collect_ops=True) if op: operations.append(op) @@ -470,6 +529,9 @@ def measure_linux_strace(self, code, collect_ops=False): if collect_ops: result["operations"] = operations + if collect_detailed: + result["detailed_data"] = detailed_data + return result finally: diff --git a/src/iops_profiler/magic.py b/src/iops_profiler/magic.py index 1970c3d..333a92d 100644 --- a/src/iops_profiler/magic.py +++ b/src/iops_profiler/magic.py @@ -41,13 +41,17 @@ def _profile_code(self, code, show_histogram=False): Returns: Dictionary with profiling results """ - # Determine if we should collect individual operations + # Determine if we should collect individual operations or detailed data collect_ops = show_histogram + # Always collect detailed data for the iops_detailed_data variable + collect_detailed = True # Determine measurement method based on platform if self.platform == "darwin": # macOS try: - results = self.collector.measure_macos_osascript(code, collect_ops=collect_ops) + results = self.collector.measure_macos_osascript( + code, collect_ops=collect_ops, collect_detailed=collect_detailed + ) except RuntimeError as e: if "Resource busy" in str(e): print("⚠️ ktrace is busy. Falling back to system-wide measurement.") @@ -65,7 +69,9 @@ def _profile_code(self, code, show_histogram=False): elif self.platform in ("linux", "linux2"): # Use strace on Linux (no elevated privileges required) try: - results = self.collector.measure_linux_strace(code, collect_ops=collect_ops) + results = self.collector.measure_linux_strace( + code, collect_ops=collect_ops, collect_detailed=collect_detailed + ) except (RuntimeError, FileNotFoundError) as e: print(f"⚠️ Could not use strace: {e}") print("Falling back to psutil per-process measurement.\n") @@ -141,6 +147,23 @@ def iops(self, line, cell=None): if show_histogram and "operations" in results: display.generate_histograms(results["operations"]) + # Inject detailed I/O data into user namespace + if "detailed_data" in results and results["detailed_data"]: + # Convert detailed data list to DataFrame + try: + import pandas as pd + + df = pd.DataFrame(results["detailed_data"]) + self.shell.user_ns["iops_detailed_data"] = df + except ImportError: + # pandas not available - store raw data + self.shell.user_ns["iops_detailed_data"] = results["detailed_data"] + else: + # No detailed data available (psutil mode or fallback) + self.shell.user_ns[ + "iops_detailed_data" + ] = "Detailed I/O data not available: profiling uses psutil mode which only provides aggregate metrics" + except Exception as e: print(f"❌ Error during IOPS profiling: {e}") print("\nYour code was not executed. Please fix the profiling issue and try again.") From 24296e8cfc4cadd5629c2cbf881efb0e0d5e107f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 29 Jan 2026 21:47:29 +0000 Subject: [PATCH 3/4] Add tests and documentation for detailed I/O data feature - Added comprehensive tests for detailed data parsing and collection - Updated README.md with usage examples for iops_detailed_data - Updated docs/user_guide.rst with detailed documentation - Fixed linter issues (line length, import order) Co-authored-by: mtauraso <31012+mtauraso@users.noreply.github.com> --- README.md | 26 ++++ docs/user_guide.rst | 60 +++++++++ src/iops_profiler/collector.py | 8 +- src/iops_profiler/magic.py | 8 +- tests/test_detailed_data.py | 223 +++++++++++++++++++++++++++++++++ 5 files changed, 320 insertions(+), 5 deletions(-) create mode 100644 tests/test_detailed_data.py diff --git a/README.md b/README.md index 55441f2..9203232 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,32 @@ The extension will display a table showing: - IOPS (operations per second) - Throughput (bytes per second) +### Accessing Detailed I/O Data + +After running `%%iops`, you can access detailed I/O operation data via the `iops_detailed_data` variable: + +```python +%%iops +with open('test.txt', 'w') as f: + f.write('data') +``` + +In the next cell: +```python +# Access the detailed I/O data +iops_detailed_data # Returns a pandas DataFrame or a message +``` + +**When detailed data is available** (Linux with strace, macOS with fs_usage): +- `iops_detailed_data` is a pandas DataFrame with columns: + - `path` (str): File path accessed + - `operation` (str): "read" or "write" + - `syscall` (str): Syscall name (e.g., "read", "write", "pread64") + - `size_bytes` (int): Bytes transferred in the operation + +**When detailed data is NOT available** (Windows, or fallback modes): +- `iops_detailed_data` is a string message explaining that detailed data is not available in the current profiling mode + ### Example Notebooks Check out our example notebooks for hands-on learning: diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 4426146..44b2b41 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -84,6 +84,66 @@ Performance Metrics Advanced Features ----------------- +Accessing Detailed I/O Data +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +After running ``%%iops``, you can access detailed information about each I/O operation via the ``iops_detailed_data`` variable that is automatically injected into your namespace. + +**Basic Usage:** + +.. code-block:: python + + %%iops + with open('test.txt', 'w') as f: + f.write('data') + +In the next cell, you can access the detailed data: + +.. code-block:: python + + # Access the detailed I/O data + iops_detailed_data + +**When detailed data is available** (Linux with strace, macOS with fs_usage): + +``iops_detailed_data`` is a pandas DataFrame with the following columns: + +- ``path`` (str): File path accessed during the I/O operation +- ``operation`` (str): Type of operation - either "read" or "write" +- ``syscall`` (str): The specific system call used (e.g., "read", "write", "pread64", "writev") +- ``size_bytes`` (int): Number of bytes transferred in the operation + +**Example DataFrame:** + +.. code-block:: python + + # Example output + path operation syscall size_bytes + 0 /tmp/test.txt write write 1024 + 1 /tmp/test.txt read read 1024 + 2 /tmp/data.bin write pwrite64 4096 + +**When detailed data is NOT available** (Windows, or fallback modes like psutil): + +``iops_detailed_data`` is a string message: + +.. code-block:: text + + "Detailed I/O data not available: profiling uses psutil mode which only provides aggregate metrics" + +This happens when: + +- Running on Windows (psutil mode by default) +- Fallback to psutil mode on Linux (when strace is not available) +- Fallback to system-wide measurement on macOS (when fs_usage fails) + +**Use Cases for Detailed Data:** + +1. **Identifying hot files**: Find which files are accessed most frequently +2. **Analyzing I/O patterns**: See the distribution of read vs write operations per file +3. **Debugging performance issues**: Identify unexpected I/O to specific files +4. **Optimizing buffer sizes**: Examine the ``size_bytes`` distribution to tune your I/O strategy + Histogram Visualization ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/iops_profiler/collector.py b/src/iops_profiler/collector.py index 1715760..91edd40 100644 --- a/src/iops_profiler/collector.py +++ b/src/iops_profiler/collector.py @@ -121,10 +121,14 @@ def parse_fs_usage_line(self, line, collect_ops=False, collect_detailed=False): This is a convenience wrapper that uses the instance's compiled byte pattern. """ - return self.parse_fs_usage_line_static(line, self._fs_usage_byte_pattern, collect_ops, collect_detailed) + return self.parse_fs_usage_line_static( + line, self._fs_usage_byte_pattern, collect_ops, collect_detailed + ) @staticmethod - def parse_strace_line_static(line, strace_pattern, io_syscalls, collect_ops=False, collect_detailed=False): + def parse_strace_line_static( + line, strace_pattern, io_syscalls, collect_ops=False, collect_detailed=False + ): """Parse a single strace output line for I/O operations (static version) Example strace lines: diff --git a/src/iops_profiler/magic.py b/src/iops_profiler/magic.py index 333a92d..eaad660 100644 --- a/src/iops_profiler/magic.py +++ b/src/iops_profiler/magic.py @@ -160,9 +160,11 @@ def iops(self, line, cell=None): self.shell.user_ns["iops_detailed_data"] = results["detailed_data"] else: # No detailed data available (psutil mode or fallback) - self.shell.user_ns[ - "iops_detailed_data" - ] = "Detailed I/O data not available: profiling uses psutil mode which only provides aggregate metrics" + message = ( + "Detailed I/O data not available: profiling uses psutil mode " + "which only provides aggregate metrics" + ) + self.shell.user_ns["iops_detailed_data"] = message except Exception as e: print(f"❌ Error during IOPS profiling: {e}") diff --git a/tests/test_detailed_data.py b/tests/test_detailed_data.py new file mode 100644 index 0000000..93ad9ef --- /dev/null +++ b/tests/test_detailed_data.py @@ -0,0 +1,223 @@ +""" +Tests for detailed I/O data collection and injection into user namespace. + +This module tests the new detailed data collection feature that provides +file paths, syscalls, and operation details via the iops_detailed_data variable. +""" + +from unittest.mock import MagicMock + +import pytest + +from iops_profiler import collector +from iops_profiler.magic import IOPSProfiler + + +def create_test_profiler(): + """Helper function to create a test profiler instance""" + mock_shell = MagicMock() + mock_shell.configurables = [] + mock_shell.user_ns = {} # Mock user namespace + profiler = IOPSProfiler.__new__(IOPSProfiler) + profiler.shell = mock_shell + # Initialize the profiler attributes manually to avoid traitlets + import sys + + profiler.platform = sys.platform + # Initialize the collector with the mock shell + from iops_profiler.collector import Collector + + profiler.collector = Collector(mock_shell) + return profiler + + +class TestDetailedDataParsing: + """Test cases for detailed data parsing""" + + @pytest.fixture + def profiler(self): + """Create an IOPSProfiler instance with a mock shell""" + return create_test_profiler() + + def test_strace_detailed_with_path(self, profiler): + """Test parsing strace line with -y flag (file path included)""" + line = '3385 read(3, "data", 4096) = 133' + result = profiler.collector.parse_strace_line(line, collect_detailed=True) + + assert result is not None + assert isinstance(result, dict) + assert result["path"] == "/tmp/test.txt" + assert result["operation"] == "read" + assert result["syscall"] == "read" + assert result["size_bytes"] == 133 + + def test_strace_detailed_without_path(self, profiler): + """Test parsing strace line without -y flag (no file path)""" + line = '3385 write(3, "Hello World...", 1100) = 1100' + result = profiler.collector.parse_strace_line(line, collect_detailed=True) + + assert result is not None + assert isinstance(result, dict) + assert result["path"] == "" # No path available + assert result["operation"] == "write" + assert result["syscall"] == "write" + assert result["size_bytes"] == 1100 + + def test_strace_detailed_pread64_with_path(self, profiler): + """Test parsing pread64 operation with path""" + line = '3385 pread64(3, "...", 1024, 0) = 1024' + result = profiler.collector.parse_strace_line(line, collect_detailed=True) + + assert result is not None + assert result["path"] == "/var/log/test.log" + assert result["operation"] == "read" + assert result["syscall"] == "pread64" + assert result["size_bytes"] == 1024 + + def test_strace_detailed_error_returns_none(self, profiler): + """Test parsing error operation returns None""" + line = "3385 read(3, 0x..., 4096) = -1 EBADF" + result = profiler.collector.parse_strace_line(line, collect_detailed=True) + + assert result is None + + def test_fs_usage_detailed_basic(self, profiler): + """Test parsing fs_usage line with detailed collection""" + line = "12:34:56 read B=0x1000 /path/to/file.txt Python" + result = profiler.collector.parse_fs_usage_line(line, collect_detailed=True) + + assert result is not None + assert isinstance(result, dict) + assert result["path"] == "/path/to/file.txt" + assert result["operation"] == "read" + assert result["syscall"] == "read" + assert result["size_bytes"] == 0x1000 + + def test_fs_usage_detailed_write(self, profiler): + """Test parsing fs_usage write operation""" + line = "12:34:57 write B=0x800 /tmp/output.dat Python" + result = profiler.collector.parse_fs_usage_line(line, collect_detailed=True) + + assert result is not None + assert result["path"] == "/tmp/output.dat" + assert result["operation"] == "write" + assert result["syscall"] == "write" + assert result["size_bytes"] == 0x800 + + def test_fs_usage_detailed_pread(self, profiler): + """Test parsing fs_usage pread operation""" + line = "12:34:58 pread B=0x400 /data/file.bin Python" + result = profiler.collector.parse_fs_usage_line(line, collect_detailed=True) + + assert result is not None + assert result["path"] == "/data/file.bin" + assert result["operation"] == "read" + assert result["syscall"] == "pread" + assert result["size_bytes"] == 0x400 + + def test_fs_usage_detailed_non_io_returns_none(self, profiler): + """Test parsing non-I/O operation returns None""" + line = "12:34:59 open B=0x1000 /path/to/file Python" + result = profiler.collector.parse_fs_usage_line(line, collect_detailed=True) + + assert result is None + + +class TestDetailedDataCollection: + """Test cases for detailed data collection in measurement methods""" + + @pytest.fixture + def profiler(self): + """Create an IOPSProfiler instance with a mock shell""" + return create_test_profiler() + + def test_detailed_data_keys_in_result(self, profiler): + """Test that detailed_data key is added when collect_detailed=True""" + # We can't fully test the measurement methods without actual I/O + # but we can test the parsing logic + strace_lines = [ + '3385 read(3, "data", 100) = 100', + '3385 write(4, "info", 200) = 200', + ] + + detailed_data = [] + for line in strace_lines: + detail = profiler.collector.parse_strace_line(line, collect_detailed=True) + if detail: + detailed_data.append(detail) + + assert len(detailed_data) == 2 + assert detailed_data[0]["path"] == "/tmp/test1.txt" + assert detailed_data[0]["operation"] == "read" + assert detailed_data[0]["size_bytes"] == 100 + assert detailed_data[1]["path"] == "/tmp/test2.txt" + assert detailed_data[1]["operation"] == "write" + assert detailed_data[1]["size_bytes"] == 200 + + +class TestDetailedDataBackwardCompatibility: + """Test backward compatibility of parsing functions""" + + @pytest.fixture + def profiler(self): + """Create an IOPSProfiler instance with a mock shell""" + return create_test_profiler() + + def test_strace_parsing_without_collect_detailed(self, profiler): + """Test that original parsing still works without collect_detailed""" + line = '3385 read(3, "data", 4096) = 133' + + # Test with collect_ops=False (default) + op_type, bytes_transferred = profiler.collector.parse_strace_line(line) + assert op_type == "read" + assert bytes_transferred == 133 + + # Test with collect_ops=True + result = profiler.collector.parse_strace_line(line, collect_ops=True) + assert result["type"] == "read" + assert result["bytes"] == 133 + + def test_fs_usage_parsing_without_collect_detailed(self, profiler): + """Test that original fs_usage parsing still works""" + line = "12:34:56 read B=0x1000 /path/to/file Python" + + # Test with default mode + op_type, bytes_transferred = profiler.collector.parse_fs_usage_line(line) + assert op_type == "read" + assert bytes_transferred == 0x1000 + + # Test with collect_ops=True + result = profiler.collector.parse_fs_usage_line(line, collect_ops=True) + assert result["type"] == "read" + assert result["bytes"] == 0x1000 + + +class TestModuleLevelFunctionsWithDetailed: + """Test module-level backward compatibility functions with detailed mode""" + + def test_module_level_parse_strace_line_with_detailed(self): + """Test module-level parse_strace_line function with collect_detailed""" + import re + + from iops_profiler.collector import STRACE_IO_SYSCALLS + + strace_pattern = re.compile(r"^\s*(\d+)\s+(\w+)\([^)]+\)\s*=\s*(-?\d+)") + io_syscalls = set(STRACE_IO_SYSCALLS) + + line = '3385 read(3, "data", 4096) = 133' + result = collector.parse_strace_line(line, strace_pattern, io_syscalls, collect_detailed=True) + + assert result is not None + assert result["path"] == "/tmp/test.txt" + assert result["operation"] == "read" + assert result["size_bytes"] == 133 + + def test_module_level_parse_fs_usage_line_with_detailed(self): + """Test module-level parse_fs_usage_line function with collect_detailed""" + line = "12:34:56 read B=0x1000 /path/to/file.txt Python" + result = collector.parse_fs_usage_line(line, collect_detailed=True) + + assert result is not None + assert result["path"] == "/path/to/file.txt" + assert result["operation"] == "read" + assert result["size_bytes"] == 0x1000 From fa9b92789013fea24cfb401c0cecc4eb6571f320 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 29 Jan 2026 21:51:44 +0000 Subject: [PATCH 4/4] Fix mutual exclusivity bug and add documentation warnings - Fixed bug where collect_ops and collect_detailed were mutually exclusive - Both histogram and detailed data can now be collected simultaneously - Added warning about iops_detailed_data variable name collision - Improved code structure to handle both flags independently - All 133 tests still pass Co-authored-by: mtauraso <31012+mtauraso@users.noreply.github.com> --- README.md | 2 ++ src/iops_profiler/collector.py | 46 ++++++++++++++++++++++------------ 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 9203232..3dd919f 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,8 @@ In the next cell: iops_detailed_data # Returns a pandas DataFrame or a message ``` +> **Note:** The `iops_detailed_data` variable is automatically created/updated in your namespace after each `%%iops` execution. Avoid using this variable name in your own code to prevent conflicts. + **When detailed data is available** (Linux with strace, macOS with fs_usage): - `iops_detailed_data` is a pandas DataFrame with columns: - `path` (str): File path accessed diff --git a/src/iops_profiler/collector.py b/src/iops_profiler/collector.py index 91edd40..4f62433 100644 --- a/src/iops_profiler/collector.py +++ b/src/iops_profiler/collector.py @@ -335,6 +335,7 @@ def measure_macos_osascript(self, code, collect_ops=False, collect_detailed=Fals if os.path.exists(output_file): with open(output_file, "r") as f: for line in f: + # Collect detailed data if requested if collect_detailed: detail = self.parse_fs_usage_line(line, collect_detailed=True) if detail: @@ -346,17 +347,23 @@ def measure_macos_osascript(self, code, collect_ops=False, collect_detailed=Fals elif detail["operation"] == "write": write_count += 1 write_bytes += detail["size_bytes"] - elif collect_ops: + + # Collect histogram operations if requested (can happen alongside detailed) + if collect_ops: op = self.parse_fs_usage_line(line, collect_ops=True) if op: operations.append(op) - if op["type"] == "read": - read_count += 1 - read_bytes += op["bytes"] - elif op["type"] == "write": - write_count += 1 - write_bytes += op["bytes"] - else: + # Only update counts if we're not already tracking via detailed + if not collect_detailed: + if op["type"] == "read": + read_count += 1 + read_bytes += op["bytes"] + elif op["type"] == "write": + write_count += 1 + write_bytes += op["bytes"] + + # Fallback: neither detailed nor ops collection + if not collect_detailed and not collect_ops: op_type, bytes_transferred = self.parse_fs_usage_line(line) if op_type == "read": read_count += 1 @@ -489,6 +496,7 @@ def measure_linux_strace(self, code, collect_ops=False, collect_detailed=False): try: with open(output_file, "r", errors="ignore") as f: for line in f: + # Collect detailed data if requested if collect_detailed: detail = self.parse_strace_line(line, collect_detailed=True) if detail: @@ -500,17 +508,23 @@ def measure_linux_strace(self, code, collect_ops=False, collect_detailed=False): elif detail["operation"] == "write": write_count += 1 write_bytes += detail["size_bytes"] - elif collect_ops: + + # Collect histogram operations if requested (can happen alongside detailed) + if collect_ops: op = self.parse_strace_line(line, collect_ops=True) if op: operations.append(op) - if op["type"] == "read": - read_count += 1 - read_bytes += op["bytes"] - elif op["type"] == "write": - write_count += 1 - write_bytes += op["bytes"] - else: + # Only update counts if we're not already tracking via detailed + if not collect_detailed: + if op["type"] == "read": + read_count += 1 + read_bytes += op["bytes"] + elif op["type"] == "write": + write_count += 1 + write_bytes += op["bytes"] + + # Fallback: neither detailed nor ops collection + if not collect_detailed and not collect_ops: op_type, bytes_transferred = self.parse_strace_line(line) if op_type == "read": read_count += 1