From 00b8a88e974c7bf5c01df3a4ee3963715d7f28ab Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 29 Jan 2026 21:40:49 +0000
Subject: [PATCH 1/4] Initial plan


From d5a9a2232c7d1353536a882051618a9592baca11 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 29 Jan 2026 21:44:44 +0000
Subject: [PATCH 2/4] Add detailed I/O data collection and injection into user
 namespace

- Added pandas as dependency
- Enhanced parse_strace_line and parse_fs_usage_line to collect detailed data (path, operation, syscall, size_bytes)
- Added -y flag to strace for file path extraction
- Modified measure_macos_osascript and measure_linux_strace to collect detailed data
- Updated magic.py to inject iops_detailed_data variable into user namespace
  - DataFrame when detailed data available
  - String message when using psutil mode

Co-authored-by: mtauraso <31012+mtauraso@users.noreply.github.com>
---
 pyproject.toml                 |   1 +
 src/iops_profiler/collector.py | 104 ++++++++++++++++++++++++++-------
 src/iops_profiler/magic.py     |  29 ++++++++-
 3 files changed, 110 insertions(+), 24 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 28392e1..8c88f09 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,6 +28,7 @@ dependencies = [
     "psutil",
     "matplotlib",
     "numpy",
+    "pandas",
 ]
 
 [project.urls]
diff --git a/src/iops_profiler/collector.py b/src/iops_profiler/collector.py
index 47d8348..1715760 100644
--- a/src/iops_profiler/collector.py
+++ b/src/iops_profiler/collector.py
@@ -66,28 +66,31 @@ def __init__(self, shell):
         self._io_syscalls = set(STRACE_IO_SYSCALLS)
 
     @staticmethod
-    def parse_fs_usage_line_static(line, byte_pattern=None, collect_ops=False):
+    def parse_fs_usage_line_static(line, byte_pattern=None, collect_ops=False, collect_detailed=False):
         """Parse a single fs_usage output line for I/O operations (static version)
 
         Args:
             line: The line to parse
             byte_pattern: Compiled regex pattern for extracting byte count (optional)
             collect_ops: If True, return full operation info for histogram collection
+            collect_detailed: If True, return detailed info including path and syscall
 
         Returns:
             If collect_ops is False: (op_type, bytes_transferred)
             If collect_ops is True: {'type': op_type, 'bytes': bytes_transferred}
+            If collect_detailed is True: {'path': str, 'operation': str, 'syscall': str, 'size_bytes': int}
         """
         parts = line.split()
         if len(parts) < 2:
-            return None if collect_ops else (None, 0)
+            return None if (collect_ops or collect_detailed) else (None, 0)
 
-        syscall = parts[1].lower()
+        syscall_raw = parts[1]
+        syscall = syscall_raw.lower()
         is_read = "read" in syscall
         is_write = "write" in syscall
 
         if not (is_read or is_write):
-            return None if collect_ops else (None, 0)
+            return None if (collect_ops or collect_detailed) else (None, 0)
 
         # Extract byte count from B=0x[hex] pattern using compiled regex
         if byte_pattern is None:
@@ -99,25 +102,36 @@ def parse_fs_usage_line_static(line, byte_pattern=None, collect_ops=False):
 
         op_type = "read" if is_read else "write"
 
-        if collect_ops:
+        # Extract file path (typically 4th column in fs_usage output)
+        path = parts[3] if len(parts) > 3 else ""
+
+        if collect_detailed:
+            return {
+                "path": path,
+                "operation": op_type,
+                "syscall": syscall_raw,
+                "size_bytes": bytes_transferred,
+            }
+        elif collect_ops:
             return {"type": op_type, "bytes": bytes_transferred}
         return op_type, bytes_transferred
 
-    def parse_fs_usage_line(self, line, collect_ops=False):
+    def parse_fs_usage_line(self, line, collect_ops=False, collect_detailed=False):
         """Parse a single fs_usage output line for I/O operations (instance method)
 
         This is a convenience wrapper that uses the instance's compiled byte pattern.
         """
-        return self.parse_fs_usage_line_static(line, self._fs_usage_byte_pattern, collect_ops)
+        return self.parse_fs_usage_line_static(line, self._fs_usage_byte_pattern, collect_ops, collect_detailed)
 
     @staticmethod
-    def parse_strace_line_static(line, strace_pattern, io_syscalls, collect_ops=False):
+    def parse_strace_line_static(line, strace_pattern, io_syscalls, collect_ops=False, collect_detailed=False):
         """Parse a single strace output line for I/O operations (static version)
 
         Example strace lines:
         3385  write(3, "Hello World...", 1100) = 1100
         3385  read(3, "data", 4096) = 133
         3385  pread64(3, "...", 1024, 0) = 1024
+        3385  read(3</tmp/test.txt>, "data", 4096) = 133  # With -y flag
 
         Note: Lines with <unfinished ...> or <... resumed> are not matched
         as they don't contain complete result information in a single line.
@@ -127,22 +141,24 @@ def parse_strace_line_static(line, strace_pattern, io_syscalls, collect_ops=Fals
             strace_pattern: Compiled regex pattern for strace output
             io_syscalls: Set of I/O syscall names to track
             collect_ops: If True, return full operation info for histogram collection
+            collect_detailed: If True, return detailed info including path and syscall
 
         Returns:
             If collect_ops is False: (op_type, bytes_transferred)
             If collect_ops is True: {'type': op_type, 'bytes': bytes_transferred}
+            If collect_detailed is True: {'path': str, 'operation': str, 'syscall': str, 'size_bytes': int}
         """
         # Match patterns like: PID syscall(fd, ..., size) = result
         match = strace_pattern.match(line)
         if not match:
-            return None if collect_ops else (None, 0)
+            return None if (collect_ops or collect_detailed) else (None, 0)
 
-        pid, syscall, result = match.groups()
-        syscall = syscall.lower()
+        pid, syscall_raw, result = match.groups()
+        syscall = syscall_raw.lower()
 
         # Check if it's one of the I/O syscalls we're tracking
         if syscall not in io_syscalls:
-            return None if collect_ops else (None, 0)
+            return None if (collect_ops or collect_detailed) else (None, 0)
 
         # Determine if it's a read or write operation based on syscall name
         if "read" in syscall:
@@ -150,25 +166,38 @@ def parse_strace_line_static(line, strace_pattern, io_syscalls, collect_ops=Fals
         elif "write" in syscall:
             is_read = False
         else:
-            return None if collect_ops else (None, 0)
+            return None if (collect_ops or collect_detailed) else (None, 0)
 
         # The return value is the number of bytes transferred (or -1 on error)
         bytes_transferred = int(result)
         if bytes_transferred < 0:
-            return None if collect_ops else (None, 0)
+            return None if (collect_ops or collect_detailed) else (None, 0)
 
         op_type = "read" if is_read else "write"
 
-        if collect_ops:
+        # Try to extract file path from fd with -y flag format: fd</path/to/file>
+        path_match = re.search(r'\d+<([^>]+)>', line)
+        path = path_match.group(1) if path_match else ""
+
+        if collect_detailed:
+            return {
+                "path": path,
+                "operation": op_type,
+                "syscall": syscall_raw,
+                "size_bytes": bytes_transferred,
+            }
+        elif collect_ops:
             return {"type": op_type, "bytes": bytes_transferred}
         return op_type, bytes_transferred
 
-    def parse_strace_line(self, line, collect_ops=False):
+    def parse_strace_line(self, line, collect_ops=False, collect_detailed=False):
         """Parse a single strace output line for I/O operations (instance method)
 
         This is a convenience wrapper that uses the instance's strace pattern and syscalls.
         """
-        return self.parse_strace_line_static(line, self._strace_pattern, self._io_syscalls, collect_ops)
+        return self.parse_strace_line_static(
+            line, self._strace_pattern, self._io_syscalls, collect_ops, collect_detailed
+        )
 
     @staticmethod
     def _create_helper_script(pid, output_file, control_file):
@@ -226,12 +255,13 @@ def _launch_helper_via_osascript(self, helper_script_path):
         )
         return proc
 
-    def measure_macos_osascript(self, code, collect_ops=False):
+    def measure_macos_osascript(self, code, collect_ops=False, collect_detailed=False):
         """Measure IOPS on macOS using fs_usage via osascript
 
         Args:
             code: The code to profile
             collect_ops: If True, collect individual operation sizes for histogram
+            collect_detailed: If True, collect detailed I/O data for DataFrame
         """
         pid = os.getpid()
 
@@ -296,11 +326,23 @@ def measure_macos_osascript(self, code, collect_ops=False):
             read_bytes = 0
             write_bytes = 0
             operations = [] if collect_ops else None
+            detailed_data = [] if collect_detailed else None
 
             if os.path.exists(output_file):
                 with open(output_file, "r") as f:
                     for line in f:
-                        if collect_ops:
+                        if collect_detailed:
+                            detail = self.parse_fs_usage_line(line, collect_detailed=True)
+                            if detail:
+                                detailed_data.append(detail)
+                                # Also update counts for regular metrics
+                                if detail["operation"] == "read":
+                                    read_count += 1
+                                    read_bytes += detail["size_bytes"]
+                                elif detail["operation"] == "write":
+                                    write_count += 1
+                                    write_bytes += detail["size_bytes"]
+                        elif collect_ops:
                             op = self.parse_fs_usage_line(line, collect_ops=True)
                             if op:
                                 operations.append(op)
@@ -331,6 +373,9 @@ def measure_macos_osascript(self, code, collect_ops=False):
             if collect_ops:
                 result["operations"] = operations
 
+            if collect_detailed:
+                result["detailed_data"] = detailed_data
+
             return result
 
         finally:
@@ -355,12 +400,13 @@ def measure_macos_osascript(self, code, collect_ops=False):
                 except OSError:
                     pass  # File already deleted or permission issue
 
-    def measure_linux_strace(self, code, collect_ops=False):
+    def measure_linux_strace(self, code, collect_ops=False, collect_detailed=False):
         """Measure IOPS on Linux using strace (no elevated privileges required)
 
         Args:
             code: The code to profile
             collect_ops: If True, collect individual operation sizes for histogram
+            collect_detailed: If True, collect detailed I/O data for DataFrame
         """
         pid = os.getpid()
 
@@ -385,6 +431,7 @@ def measure_linux_strace(self, code, collect_ops=False):
             strace_cmd = [
                 "strace",
                 "-f",  # Follow forks
+                "-y",  # Print paths associated with file descriptor arguments
                 "-e",
                 f"trace={syscalls_to_trace}",
                 "-o",
@@ -432,12 +479,24 @@ def measure_linux_strace(self, code, collect_ops=False):
             read_bytes = 0
             write_bytes = 0
             operations = [] if collect_ops else None
+            detailed_data = [] if collect_detailed else None
 
             if os.path.exists(output_file):
                 try:
                     with open(output_file, "r", errors="ignore") as f:
                         for line in f:
-                            if collect_ops:
+                            if collect_detailed:
+                                detail = self.parse_strace_line(line, collect_detailed=True)
+                                if detail:
+                                    detailed_data.append(detail)
+                                    # Also update counts for regular metrics
+                                    if detail["operation"] == "read":
+                                        read_count += 1
+                                        read_bytes += detail["size_bytes"]
+                                    elif detail["operation"] == "write":
+                                        write_count += 1
+                                        write_bytes += detail["size_bytes"]
+                            elif collect_ops:
                                 op = self.parse_strace_line(line, collect_ops=True)
                                 if op:
                                     operations.append(op)
@@ -470,6 +529,9 @@ def measure_linux_strace(self, code, collect_ops=False):
             if collect_ops:
                 result["operations"] = operations
 
+            if collect_detailed:
+                result["detailed_data"] = detailed_data
+
             return result
 
         finally:
diff --git a/src/iops_profiler/magic.py b/src/iops_profiler/magic.py
index 1970c3d..333a92d 100644
--- a/src/iops_profiler/magic.py
+++ b/src/iops_profiler/magic.py
@@ -41,13 +41,17 @@ def _profile_code(self, code, show_histogram=False):
         Returns:
             Dictionary with profiling results
         """
-        # Determine if we should collect individual operations
+        # Determine if we should collect individual operations or detailed data
         collect_ops = show_histogram
+        # Always collect detailed data for the iops_detailed_data variable
+        collect_detailed = True
 
         # Determine measurement method based on platform
         if self.platform == "darwin":  # macOS
             try:
-                results = self.collector.measure_macos_osascript(code, collect_ops=collect_ops)
+                results = self.collector.measure_macos_osascript(
+                    code, collect_ops=collect_ops, collect_detailed=collect_detailed
+                )
             except RuntimeError as e:
                 if "Resource busy" in str(e):
                     print("⚠️ ktrace is busy. Falling back to system-wide measurement.")
@@ -65,7 +69,9 @@ def _profile_code(self, code, show_histogram=False):
         elif self.platform in ("linux", "linux2"):
             # Use strace on Linux (no elevated privileges required)
             try:
-                results = self.collector.measure_linux_strace(code, collect_ops=collect_ops)
+                results = self.collector.measure_linux_strace(
+                    code, collect_ops=collect_ops, collect_detailed=collect_detailed
+                )
             except (RuntimeError, FileNotFoundError) as e:
                 print(f"⚠️ Could not use strace: {e}")
                 print("Falling back to psutil per-process measurement.\n")
@@ -141,6 +147,23 @@ def iops(self, line, cell=None):
             if show_histogram and "operations" in results:
                 display.generate_histograms(results["operations"])
 
+            # Inject detailed I/O data into user namespace
+            if "detailed_data" in results and results["detailed_data"]:
+                # Convert detailed data list to DataFrame
+                try:
+                    import pandas as pd
+
+                    df = pd.DataFrame(results["detailed_data"])
+                    self.shell.user_ns["iops_detailed_data"] = df
+                except ImportError:
+                    # pandas not available - store raw data
+                    self.shell.user_ns["iops_detailed_data"] = results["detailed_data"]
+            else:
+                # No detailed data available (psutil mode or fallback)
+                self.shell.user_ns[
+                    "iops_detailed_data"
+                ] = "Detailed I/O data not available: profiling uses psutil mode which only provides aggregate metrics"
+
         except Exception as e:
             print(f"❌ Error during IOPS profiling: {e}")
             print("\nYour code was not executed. Please fix the profiling issue and try again.")

From 24296e8cfc4cadd5629c2cbf881efb0e0d5e107f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 29 Jan 2026 21:47:29 +0000
Subject: [PATCH 3/4] Add tests and documentation for detailed I/O data feature

- Added comprehensive tests for detailed data parsing and collection
- Updated README.md with usage examples for iops_detailed_data
- Updated docs/user_guide.rst with detailed documentation
- Fixed linter issues (line length, import order)

Co-authored-by: mtauraso <31012+mtauraso@users.noreply.github.com>
---
 README.md                      |  26 ++++
 docs/user_guide.rst            |  60 +++++++++
 src/iops_profiler/collector.py |   8 +-
 src/iops_profiler/magic.py     |   8 +-
 tests/test_detailed_data.py    | 223 +++++++++++++++++++++++++++++++++
 5 files changed, 320 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_detailed_data.py

diff --git a/README.md b/README.md
index 55441f2..9203232 100644
--- a/README.md
+++ b/README.md
@@ -90,6 +90,32 @@ The extension will display a table showing:
 - IOPS (operations per second)
 - Throughput (bytes per second)
 
+### Accessing Detailed I/O Data
+
+After running `%%iops`, you can access detailed I/O operation data via the `iops_detailed_data` variable:
+
+```python
+%%iops
+with open('test.txt', 'w') as f:
+    f.write('data')
+```
+
+In the next cell:
+```python
+# Access the detailed I/O data
+iops_detailed_data  # Returns a pandas DataFrame or a message
+```
+
+**When detailed data is available** (Linux with strace, macOS with fs_usage):
+- `iops_detailed_data` is a pandas DataFrame with columns:
+  - `path` (str): File path accessed
+  - `operation` (str): "read" or "write"
+  - `syscall` (str): Syscall name (e.g., "read", "write", "pread64")
+  - `size_bytes` (int): Bytes transferred in the operation
+
+**When detailed data is NOT available** (Windows, or fallback modes):
+- `iops_detailed_data` is a string message explaining that detailed data is not available in the current profiling mode
+
 ### Example Notebooks
 
 Check out our example notebooks for hands-on learning:
diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index 4426146..44b2b41 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -84,6 +84,66 @@ Performance Metrics
 Advanced Features
 -----------------
 
+Accessing Detailed I/O Data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+After running ``%%iops``, you can access detailed information about each I/O operation via the ``iops_detailed_data`` variable that is automatically injected into your namespace.
+
+**Basic Usage:**
+
+.. code-block:: python
+
+   %%iops
+   with open('test.txt', 'w') as f:
+       f.write('data')
+
+In the next cell, you can access the detailed data:
+
+.. code-block:: python
+
+   # Access the detailed I/O data
+   iops_detailed_data
+
+**When detailed data is available** (Linux with strace, macOS with fs_usage):
+
+``iops_detailed_data`` is a pandas DataFrame with the following columns:
+
+- ``path`` (str): File path accessed during the I/O operation
+- ``operation`` (str): Type of operation - either "read" or "write"
+- ``syscall`` (str): The specific system call used (e.g., "read", "write", "pread64", "writev")
+- ``size_bytes`` (int): Number of bytes transferred in the operation
+
+**Example DataFrame:**
+
+.. code-block:: python
+
+   # Example output
+           path operation  syscall  size_bytes
+   0  /tmp/test.txt     write    write        1024
+   1  /tmp/test.txt      read     read        1024
+   2  /tmp/data.bin     write  pwrite64        4096
+
+**When detailed data is NOT available** (Windows, or fallback modes like psutil):
+
+``iops_detailed_data`` is a string message:
+
+.. code-block:: text
+
+   "Detailed I/O data not available: profiling uses psutil mode which only provides aggregate metrics"
+
+This happens when:
+
+- Running on Windows (psutil mode by default)
+- Fallback to psutil mode on Linux (when strace is not available)
+- Fallback to system-wide measurement on macOS (when fs_usage fails)
+
+**Use Cases for Detailed Data:**
+
+1. **Identifying hot files**: Find which files are accessed most frequently
+2. **Analyzing I/O patterns**: See the distribution of read vs write operations per file
+3. **Debugging performance issues**: Identify unexpected I/O to specific files
+4. **Optimizing buffer sizes**: Examine the ``size_bytes`` distribution to tune your I/O strategy
+
 Histogram Visualization
 ~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/src/iops_profiler/collector.py b/src/iops_profiler/collector.py
index 1715760..91edd40 100644
--- a/src/iops_profiler/collector.py
+++ b/src/iops_profiler/collector.py
@@ -121,10 +121,14 @@ def parse_fs_usage_line(self, line, collect_ops=False, collect_detailed=False):
 
         This is a convenience wrapper that uses the instance's compiled byte pattern.
         """
-        return self.parse_fs_usage_line_static(line, self._fs_usage_byte_pattern, collect_ops, collect_detailed)
+        return self.parse_fs_usage_line_static(
+            line, self._fs_usage_byte_pattern, collect_ops, collect_detailed
+        )
 
     @staticmethod
-    def parse_strace_line_static(line, strace_pattern, io_syscalls, collect_ops=False, collect_detailed=False):
+    def parse_strace_line_static(
+        line, strace_pattern, io_syscalls, collect_ops=False, collect_detailed=False
+    ):
         """Parse a single strace output line for I/O operations (static version)
 
         Example strace lines:
diff --git a/src/iops_profiler/magic.py b/src/iops_profiler/magic.py
index 333a92d..eaad660 100644
--- a/src/iops_profiler/magic.py
+++ b/src/iops_profiler/magic.py
@@ -160,9 +160,11 @@ def iops(self, line, cell=None):
                     self.shell.user_ns["iops_detailed_data"] = results["detailed_data"]
             else:
                 # No detailed data available (psutil mode or fallback)
-                self.shell.user_ns[
-                    "iops_detailed_data"
-                ] = "Detailed I/O data not available: profiling uses psutil mode which only provides aggregate metrics"
+                message = (
+                    "Detailed I/O data not available: profiling uses psutil mode "
+                    "which only provides aggregate metrics"
+                )
+                self.shell.user_ns["iops_detailed_data"] = message
 
         except Exception as e:
             print(f"❌ Error during IOPS profiling: {e}")
diff --git a/tests/test_detailed_data.py b/tests/test_detailed_data.py
new file mode 100644
index 0000000..93ad9ef
--- /dev/null
+++ b/tests/test_detailed_data.py
@@ -0,0 +1,223 @@
+"""
+Tests for detailed I/O data collection and injection into user namespace.
+
+This module tests the new detailed data collection feature that provides
+file paths, syscalls, and operation details via the iops_detailed_data variable.
+"""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from iops_profiler import collector
+from iops_profiler.magic import IOPSProfiler
+
+
+def create_test_profiler():
+    """Helper function to create a test profiler instance"""
+    mock_shell = MagicMock()
+    mock_shell.configurables = []
+    mock_shell.user_ns = {}  # Mock user namespace
+    profiler = IOPSProfiler.__new__(IOPSProfiler)
+    profiler.shell = mock_shell
+    # Initialize the profiler attributes manually to avoid traitlets
+    import sys
+
+    profiler.platform = sys.platform
+    # Initialize the collector with the mock shell
+    from iops_profiler.collector import Collector
+
+    profiler.collector = Collector(mock_shell)
+    return profiler
+
+
+class TestDetailedDataParsing:
+    """Test cases for detailed data parsing"""
+
+    @pytest.fixture
+    def profiler(self):
+        """Create an IOPSProfiler instance with a mock shell"""
+        return create_test_profiler()
+
+    def test_strace_detailed_with_path(self, profiler):
+        """Test parsing strace line with -y flag (file path included)"""
+        line = '3385  read(3</tmp/test.txt>, "data", 4096) = 133'
+        result = profiler.collector.parse_strace_line(line, collect_detailed=True)
+
+        assert result is not None
+        assert isinstance(result, dict)
+        assert result["path"] == "/tmp/test.txt"
+        assert result["operation"] == "read"
+        assert result["syscall"] == "read"
+        assert result["size_bytes"] == 133
+
+    def test_strace_detailed_without_path(self, profiler):
+        """Test parsing strace line without -y flag (no file path)"""
+        line = '3385  write(3, "Hello World...", 1100) = 1100'
+        result = profiler.collector.parse_strace_line(line, collect_detailed=True)
+
+        assert result is not None
+        assert isinstance(result, dict)
+        assert result["path"] == ""  # No path available
+        assert result["operation"] == "write"
+        assert result["syscall"] == "write"
+        assert result["size_bytes"] == 1100
+
+    def test_strace_detailed_pread64_with_path(self, profiler):
+        """Test parsing pread64 operation with path"""
+        line = '3385  pread64(3</var/log/test.log>, "...", 1024, 0) = 1024'
+        result = profiler.collector.parse_strace_line(line, collect_detailed=True)
+
+        assert result is not None
+        assert result["path"] == "/var/log/test.log"
+        assert result["operation"] == "read"
+        assert result["syscall"] == "pread64"
+        assert result["size_bytes"] == 1024
+
+    def test_strace_detailed_error_returns_none(self, profiler):
+        """Test parsing error operation returns None"""
+        line = "3385  read(3, 0x..., 4096) = -1 EBADF"
+        result = profiler.collector.parse_strace_line(line, collect_detailed=True)
+
+        assert result is None
+
+    def test_fs_usage_detailed_basic(self, profiler):
+        """Test parsing fs_usage line with detailed collection"""
+        line = "12:34:56  read  B=0x1000  /path/to/file.txt  Python"
+        result = profiler.collector.parse_fs_usage_line(line, collect_detailed=True)
+
+        assert result is not None
+        assert isinstance(result, dict)
+        assert result["path"] == "/path/to/file.txt"
+        assert result["operation"] == "read"
+        assert result["syscall"] == "read"
+        assert result["size_bytes"] == 0x1000
+
+    def test_fs_usage_detailed_write(self, profiler):
+        """Test parsing fs_usage write operation"""
+        line = "12:34:57  write  B=0x800  /tmp/output.dat  Python"
+        result = profiler.collector.parse_fs_usage_line(line, collect_detailed=True)
+
+        assert result is not None
+        assert result["path"] == "/tmp/output.dat"
+        assert result["operation"] == "write"
+        assert result["syscall"] == "write"
+        assert result["size_bytes"] == 0x800
+
+    def test_fs_usage_detailed_pread(self, profiler):
+        """Test parsing fs_usage pread operation"""
+        line = "12:34:58  pread  B=0x400  /data/file.bin  Python"
+        result = profiler.collector.parse_fs_usage_line(line, collect_detailed=True)
+
+        assert result is not None
+        assert result["path"] == "/data/file.bin"
+        assert result["operation"] == "read"
+        assert result["syscall"] == "pread"
+        assert result["size_bytes"] == 0x400
+
+    def test_fs_usage_detailed_non_io_returns_none(self, profiler):
+        """Test parsing non-I/O operation returns None"""
+        line = "12:34:59  open  B=0x1000  /path/to/file  Python"
+        result = profiler.collector.parse_fs_usage_line(line, collect_detailed=True)
+
+        assert result is None
+
+
+class TestDetailedDataCollection:
+    """Test cases for detailed data collection in measurement methods"""
+
+    @pytest.fixture
+    def profiler(self):
+        """Create an IOPSProfiler instance with a mock shell"""
+        return create_test_profiler()
+
+    def test_detailed_data_keys_in_result(self, profiler):
+        """Test that detailed_data key is added when collect_detailed=True"""
+        # We can't fully test the measurement methods without actual I/O
+        # but we can test the parsing logic
+        strace_lines = [
+            '3385  read(3</tmp/test1.txt>, "data", 100) = 100',
+            '3385  write(4</tmp/test2.txt>, "info", 200) = 200',
+        ]
+
+        detailed_data = []
+        for line in strace_lines:
+            detail = profiler.collector.parse_strace_line(line, collect_detailed=True)
+            if detail:
+                detailed_data.append(detail)
+
+        assert len(detailed_data) == 2
+        assert detailed_data[0]["path"] == "/tmp/test1.txt"
+        assert detailed_data[0]["operation"] == "read"
+        assert detailed_data[0]["size_bytes"] == 100
+        assert detailed_data[1]["path"] == "/tmp/test2.txt"
+        assert detailed_data[1]["operation"] == "write"
+        assert detailed_data[1]["size_bytes"] == 200
+
+
+class TestDetailedDataBackwardCompatibility:
+    """Test backward compatibility of parsing functions"""
+
+    @pytest.fixture
+    def profiler(self):
+        """Create an IOPSProfiler instance with a mock shell"""
+        return create_test_profiler()
+
+    def test_strace_parsing_without_collect_detailed(self, profiler):
+        """Test that original parsing still works without collect_detailed"""
+        line = '3385  read(3</tmp/test.txt>, "data", 4096) = 133'
+
+        # Test with collect_ops=False (default)
+        op_type, bytes_transferred = profiler.collector.parse_strace_line(line)
+        assert op_type == "read"
+        assert bytes_transferred == 133
+
+        # Test with collect_ops=True
+        result = profiler.collector.parse_strace_line(line, collect_ops=True)
+        assert result["type"] == "read"
+        assert result["bytes"] == 133
+
+    def test_fs_usage_parsing_without_collect_detailed(self, profiler):
+        """Test that original fs_usage parsing still works"""
+        line = "12:34:56  read  B=0x1000  /path/to/file  Python"
+
+        # Test with default mode
+        op_type, bytes_transferred = profiler.collector.parse_fs_usage_line(line)
+        assert op_type == "read"
+        assert bytes_transferred == 0x1000
+
+        # Test with collect_ops=True
+        result = profiler.collector.parse_fs_usage_line(line, collect_ops=True)
+        assert result["type"] == "read"
+        assert result["bytes"] == 0x1000
+
+
+class TestModuleLevelFunctionsWithDetailed:
+    """Test module-level backward compatibility functions with detailed mode"""
+
+    def test_module_level_parse_strace_line_with_detailed(self):
+        """Test module-level parse_strace_line function with collect_detailed"""
+        import re
+
+        from iops_profiler.collector import STRACE_IO_SYSCALLS
+
+        strace_pattern = re.compile(r"^\s*(\d+)\s+(\w+)\([^)]+\)\s*=\s*(-?\d+)")
+        io_syscalls = set(STRACE_IO_SYSCALLS)
+
+        line = '3385  read(3</tmp/test.txt>, "data", 4096) = 133'
+        result = collector.parse_strace_line(line, strace_pattern, io_syscalls, collect_detailed=True)
+
+        assert result is not None
+        assert result["path"] == "/tmp/test.txt"
+        assert result["operation"] == "read"
+        assert result["size_bytes"] == 133
+
+    def test_module_level_parse_fs_usage_line_with_detailed(self):
+        """Test module-level parse_fs_usage_line function with collect_detailed"""
+        line = "12:34:56  read  B=0x1000  /path/to/file.txt  Python"
+        result = collector.parse_fs_usage_line(line, collect_detailed=True)
+
+        assert result is not None
+        assert result["path"] == "/path/to/file.txt"
+        assert result["operation"] == "read"
+        assert result["size_bytes"] == 0x1000

From fa9b92789013fea24cfb401c0cecc4eb6571f320 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 29 Jan 2026 21:51:44 +0000
Subject: [PATCH 4/4] Fix mutual exclusivity bug and add documentation warnings

- Fixed bug where collect_ops and collect_detailed were mutually exclusive
- Both histogram and detailed data can now be collected simultaneously
- Added warning about iops_detailed_data variable name collision
- Improved code structure to handle both flags independently
- All 133 tests still pass

Co-authored-by: mtauraso <31012+mtauraso@users.noreply.github.com>
---
 README.md                      |  2 ++
 src/iops_profiler/collector.py | 46 ++++++++++++++++++++++------------
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 9203232..3dd919f 100644
--- a/README.md
+++ b/README.md
@@ -106,6 +106,8 @@ In the next cell:
 iops_detailed_data  # Returns a pandas DataFrame or a message
 ```
 
+> **Note:** The `iops_detailed_data` variable is automatically created/updated in your namespace after each `%%iops` execution. Avoid using this variable name in your own code to prevent conflicts.
+
 **When detailed data is available** (Linux with strace, macOS with fs_usage):
 - `iops_detailed_data` is a pandas DataFrame with columns:
   - `path` (str): File path accessed
diff --git a/src/iops_profiler/collector.py b/src/iops_profiler/collector.py
index 91edd40..4f62433 100644
--- a/src/iops_profiler/collector.py
+++ b/src/iops_profiler/collector.py
@@ -335,6 +335,7 @@ def measure_macos_osascript(self, code, collect_ops=False, collect_detailed=Fals
             if os.path.exists(output_file):
                 with open(output_file, "r") as f:
                     for line in f:
+                        # Collect detailed data if requested
                         if collect_detailed:
                             detail = self.parse_fs_usage_line(line, collect_detailed=True)
                             if detail:
@@ -346,17 +347,23 @@ def measure_macos_osascript(self, code, collect_ops=False, collect_detailed=Fals
                                 elif detail["operation"] == "write":
                                     write_count += 1
                                     write_bytes += detail["size_bytes"]
-                        elif collect_ops:
+
+                        # Collect histogram operations if requested (can happen alongside detailed)
+                        if collect_ops:
                             op = self.parse_fs_usage_line(line, collect_ops=True)
                             if op:
                                 operations.append(op)
-                                if op["type"] == "read":
-                                    read_count += 1
-                                    read_bytes += op["bytes"]
-                                elif op["type"] == "write":
-                                    write_count += 1
-                                    write_bytes += op["bytes"]
-                        else:
+                                # Only update counts if we're not already tracking via detailed
+                                if not collect_detailed:
+                                    if op["type"] == "read":
+                                        read_count += 1
+                                        read_bytes += op["bytes"]
+                                    elif op["type"] == "write":
+                                        write_count += 1
+                                        write_bytes += op["bytes"]
+
+                        # Fallback: neither detailed nor ops collection
+                        if not collect_detailed and not collect_ops:
                             op_type, bytes_transferred = self.parse_fs_usage_line(line)
                             if op_type == "read":
                                 read_count += 1
@@ -489,6 +496,7 @@ def measure_linux_strace(self, code, collect_ops=False, collect_detailed=False):
                 try:
                     with open(output_file, "r", errors="ignore") as f:
                         for line in f:
+                            # Collect detailed data if requested
                             if collect_detailed:
                                 detail = self.parse_strace_line(line, collect_detailed=True)
                                 if detail:
@@ -500,17 +508,23 @@ def measure_linux_strace(self, code, collect_ops=False, collect_detailed=False):
                                     elif detail["operation"] == "write":
                                         write_count += 1
                                         write_bytes += detail["size_bytes"]
-                            elif collect_ops:
+
+                            # Collect histogram operations if requested (can happen alongside detailed)
+                            if collect_ops:
                                 op = self.parse_strace_line(line, collect_ops=True)
                                 if op:
                                     operations.append(op)
-                                    if op["type"] == "read":
-                                        read_count += 1
-                                        read_bytes += op["bytes"]
-                                    elif op["type"] == "write":
-                                        write_count += 1
-                                        write_bytes += op["bytes"]
-                            else:
+                                    # Only update counts if we're not already tracking via detailed
+                                    if not collect_detailed:
+                                        if op["type"] == "read":
+                                            read_count += 1
+                                            read_bytes += op["bytes"]
+                                        elif op["type"] == "write":
+                                            write_count += 1
+                                            write_bytes += op["bytes"]
+
+                            # Fallback: neither detailed nor ops collection
+                            if not collect_detailed and not collect_ops:
                                 op_type, bytes_transferred = self.parse_strace_line(line)
                                 if op_type == "read":
                                     read_count += 1