From 77c17daa3d11d22c5095ac1fcd5b9e0847562a1f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 5 Nov 2025 03:39:06 +0000 Subject: [PATCH 1/5] Initial plan From f185a44044d6053cb325c9aab9c9b7a2dfaca376 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 5 Nov 2025 03:48:31 +0000 Subject: [PATCH 2/5] Implement Windows granular I/O tracking using Python-level interception Co-authored-by: mtauraso <31012+mtauraso@users.noreply.github.com> --- README.md | 10 +- src/iops_profiler/iops_profiler.py | 179 ++++++++++++++++++++++++++++- 2 files changed, 181 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 213d498..ab92122 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ The extension will display a table showing: ### Histogram Visualization -Use the `--histogram` flag to visualize I/O operation distributions (available for `strace` and `fs_usage` measurement modes): +Use the `--histogram` flag to visualize I/O operation distributions: ```python %%iops --histogram @@ -61,8 +61,14 @@ Both charts display separate lines for reads, writes, and all operations combine ## Platform Support -- **Linux/Windows**: Uses `psutil` for per-process I/O tracking +- **Linux**: Uses `strace` for detailed per-operation tracking (fallback to `psutil` if `strace` unavailable) + - With `strace`: Captures all system-level I/O operations + - With `psutil`: Provides aggregate counts only (no histogram support) - **macOS**: Uses `fs_usage` with privilege elevation (requires password prompt) + - Captures all system-level I/O operations +- **Windows**: Uses Python-level I/O tracking for granular data + - Captures Python `open()`/`read()`/`write()` operations + - **Note**: May not capture I/O from native C extensions or libraries ## Requirements diff --git a/src/iops_profiler/iops_profiler.py b/src/iops_profiler/iops_profiler.py index d5a6cf9..8bc70e5 100644 --- a/src/iops_profiler/iops_profiler.py +++ b/src/iops_profiler/iops_profiler.py @@ -59,8 +59,13 @@ def __init__(self, shell): # Set of syscall names for I/O operations (lowercase) self._io_syscalls = set(STRACE_IO_SYSCALLS) - def _measure_linux_windows(self, code): - """Measure IOPS on Linux/Windows using psutil""" + def _measure_linux_windows(self, code, collect_ops=False): + """Measure IOPS on Linux/Windows using psutil + + Args: + code: The code to profile + collect_ops: If True, collect individual operation sizes for histogram (Windows only) + """ if not psutil: raise RuntimeError("psutil not installed. Run: pip install psutil") @@ -72,6 +77,10 @@ def _measure_linux_windows(self, code): except AttributeError: raise RuntimeError(f"psutil.Process.io_counters() not supported on {self.platform}") + # On Windows, use Python-level I/O tracking if granular data is requested + if collect_ops and self.platform == 'win32': + return self._measure_windows_python_io(code, io_before) + # Execute the code start_time = time.time() self.shell.run_cell(code) @@ -95,6 +104,166 @@ def _measure_linux_windows(self, code): 'method': 'psutil (per-process)' } + def _measure_windows_python_io(self, code, io_before): + """Measure IOPS on Windows with Python-level I/O tracking for granular data + + This method intercepts Python's built-in open() function to track individual + read and write operations at the Python level. + + Note: Only captures Python-level I/O (open/read/write), not native C extensions. + + Args: + code: The code to profile + io_before: Initial psutil I/O counters for verification + """ + operations = [] + + # Import builtins to patch open() + import builtins + + original_open = builtins.open + + class IOTracker: + """Wrapper to track I/O operations on file objects""" + def __init__(self, file_obj, mode): + self.file = file_obj + self.mode = mode + + def read(self, size=-1): + result = self.file.read(size) + if result: + bytes_read = len(result) if isinstance(result, (bytes, str)) else 0 + if bytes_read > 0: + operations.append({'type': 'read', 'bytes': bytes_read}) + return result + + def readline(self, size=-1): + result = self.file.readline(size) + if result: + bytes_read = len(result) if isinstance(result, (bytes, str)) else 0 + if bytes_read > 0: + operations.append({'type': 'read', 'bytes': bytes_read}) + return result + + def readlines(self, hint=-1): + result = self.file.readlines(hint) + if result: + total_bytes = sum(len(line) for line in result) + if total_bytes > 0: + # Record as individual operations to better reflect actual I/O + for line in result: + if line: + operations.append({'type': 'read', 'bytes': len(line)}) + return result + + def write(self, data): + result = self.file.write(data) + bytes_written = len(data) if isinstance(data, (bytes, str)) else 0 + if bytes_written > 0: + operations.append({'type': 'write', 'bytes': bytes_written}) + return result + + def writelines(self, lines): + result = self.file.writelines(lines) + # Record as individual operations + for line in lines: + if line: + bytes_written = len(line) if isinstance(line, (bytes, str)) else 0 + if bytes_written > 0: + operations.append({'type': 'write', 'bytes': bytes_written}) + return result + + def __enter__(self): + return self + + def __exit__(self, *args): + return self.file.__exit__(*args) + + def __iter__(self): + return self + + def __next__(self): + line = self.file.__next__() + if line: + bytes_read = len(line) if isinstance(line, (bytes, str)) else 0 + if bytes_read > 0: + operations.append({'type': 'read', 'bytes': bytes_read}) + return line + + def __getattr__(self, name): + return getattr(self.file, name) + + def tracked_open(file, mode='r', *args, **kwargs): + """Wrapper for open() that tracks I/O operations""" + file_obj = original_open(file, mode, *args, **kwargs) + # Only track readable/writable files + if 'r' in mode or 'w' in mode or 'a' in mode or '+' in mode: + return IOTracker(file_obj, mode) + return file_obj + + try: + # Install the tracking wrapper in builtins + builtins.open = tracked_open + + # Also inject into the IPython namespace to ensure it's used + shell_open = self.shell.user_ns.get('open', original_open) + self.shell.user_ns['open'] = tracked_open + + # Execute the code + start_time = time.time() + self.shell.run_cell(code) + elapsed_time = time.time() - start_time + + finally: + # Restore original open in both places + builtins.open = original_open + if 'open' in self.shell.user_ns: + # Restore the original value or remove if it was injected + if shell_open != tracked_open: + self.shell.user_ns['open'] = shell_open + else: + self.shell.user_ns.pop('open', None) + + # Get final I/O counters for aggregate counts + process = psutil.Process() + io_after = process.io_counters() + + # Calculate aggregate differences from psutil + read_count = io_after.read_count - io_before.read_count + write_count = io_after.write_count - io_before.write_count + read_bytes = io_after.read_bytes - io_before.read_bytes + write_bytes = io_after.write_bytes - io_before.write_bytes + + # If we captured operations, use them; otherwise fall back to psutil counts + if operations: + # Count operations from our tracker + tracked_read_count = sum(1 for op in operations if op['type'] == 'read') + tracked_write_count = sum(1 for op in operations if op['type'] == 'write') + tracked_read_bytes = sum(op['bytes'] for op in operations if op['type'] == 'read') + tracked_write_bytes = sum(op['bytes'] for op in operations if op['type'] == 'write') + + # Prefer tracked counts if available, fall back to psutil + result_read_count = tracked_read_count if tracked_read_count > 0 else read_count + result_write_count = tracked_write_count if tracked_write_count > 0 else write_count + result_read_bytes = tracked_read_bytes if tracked_read_bytes > 0 else read_bytes + result_write_bytes = tracked_write_bytes if tracked_write_bytes > 0 else write_bytes + else: + # No operations tracked, use psutil counts + result_read_count = read_count + result_write_count = write_count + result_read_bytes = read_bytes + result_write_bytes = write_bytes + + return { + 'read_count': result_read_count, + 'write_count': result_write_count, + 'read_bytes': result_read_bytes, + 'write_bytes': result_write_bytes, + 'elapsed_time': elapsed_time, + 'method': 'Python I/O tracking (per-operation)', + 'operations': operations + } + def _parse_fs_usage_line(self, line, collect_ops=False): """Parse a single fs_usage output line for I/O operations @@ -764,14 +933,12 @@ def iops(self, line, cell): except (RuntimeError, FileNotFoundError) as e: print(f"⚠️ Could not use strace: {e}") print("Falling back to psutil per-process measurement.\n") - results = self._measure_linux_windows(cell) + results = self._measure_linux_windows(cell, collect_ops=False) if show_histogram: print("⚠️ Histograms not available for psutil measurement mode.") elif self.platform == 'win32': - results = self._measure_linux_windows(cell) - if show_histogram: - print("⚠️ Histograms not available for psutil measurement mode on Windows.") + results = self._measure_linux_windows(cell, collect_ops=collect_ops) else: print(f"⚠️ Platform '{self.platform}' not fully supported.") From 0817238b0cd9b388d15367a81c57876df126f428 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 5 Nov 2025 03:56:42 +0000 Subject: [PATCH 3/5] Clean up unused imports and simplify code Co-authored-by: mtauraso <31012+mtauraso@users.noreply.github.com> --- src/iops_profiler/iops_profiler.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/iops_profiler/iops_profiler.py b/src/iops_profiler/iops_profiler.py index 8bc70e5..29e9497 100644 --- a/src/iops_profiler/iops_profiler.py +++ b/src/iops_profiler/iops_profiler.py @@ -15,10 +15,8 @@ import re import subprocess import tempfile -from pathlib import Path from IPython.core.magic import Magics, magics_class, cell_magic from IPython.display import display, HTML -import math try: import psutil @@ -332,11 +330,9 @@ def _parse_strace_line(self, line, collect_ops=False): # (e.g., read, pread64, readv, write, pwrite64, writev) # Note: No standard syscalls contain both 'read' and 'write' in their names if 'read' in syscall: - is_read = True - is_write = False + op_type = 'read' elif 'write' in syscall: - is_read = False - is_write = True + op_type = 'write' else: return None if collect_ops else (None, 0) @@ -345,8 +341,6 @@ def _parse_strace_line(self, line, collect_ops=False): if bytes_transferred < 0: return None if collect_ops else (None, 0) - op_type = 'read' if is_read else 'write' - if collect_ops: return {'type': op_type, 'bytes': bytes_transferred} return op_type, bytes_transferred From 5d8bd7610a778bc64c8d9899836b6ec88136c324 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 5 Nov 2025 04:01:33 +0000 Subject: [PATCH 4/5] Address code review feedback: improve IOTracker and namespace handling Co-authored-by: mtauraso <31012+mtauraso@users.noreply.github.com> --- src/iops_profiler/iops_profiler.py | 38 ++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/iops_profiler/iops_profiler.py b/src/iops_profiler/iops_profiler.py index 29e9497..a93e069 100644 --- a/src/iops_profiler/iops_profiler.py +++ b/src/iops_profiler/iops_profiler.py @@ -172,6 +172,9 @@ def writelines(self, lines): return result def __enter__(self): + # Return self to allow our wrapper methods to be used + # The wrapped file's __enter__ is called automatically on the underlying file + self.file.__enter__() return self def __exit__(self, *args): @@ -181,14 +184,33 @@ def __iter__(self): return self def __next__(self): + # Let StopIteration propagate naturally line = self.file.__next__() + # Track the operation only if successful if line: bytes_read = len(line) if isinstance(line, (bytes, str)) else 0 if bytes_read > 0: operations.append({'type': 'read', 'bytes': bytes_read}) return line + def close(self): + """Close the file""" + return self.file.close() + + def flush(self): + """Flush the file buffer""" + return self.file.flush() + + def seek(self, offset, whence=0): + """Seek to a position in the file""" + return self.file.seek(offset, whence) + + def tell(self): + """Return current file position""" + return self.file.tell() + def __getattr__(self, name): + """Fallback for any other file methods""" return getattr(self.file, name) def tracked_open(file, mode='r', *args, **kwargs): @@ -204,7 +226,9 @@ def tracked_open(file, mode='r', *args, **kwargs): builtins.open = tracked_open # Also inject into the IPython namespace to ensure it's used - shell_open = self.shell.user_ns.get('open', original_open) + # Save whether 'open' was already in the namespace + had_open_in_ns = 'open' in self.shell.user_ns + shell_open = self.shell.user_ns.get('open') if had_open_in_ns else None self.shell.user_ns['open'] = tracked_open # Execute the code @@ -215,12 +239,12 @@ def tracked_open(file, mode='r', *args, **kwargs): finally: # Restore original open in both places builtins.open = original_open - if 'open' in self.shell.user_ns: - # Restore the original value or remove if it was injected - if shell_open != tracked_open: - self.shell.user_ns['open'] = shell_open - else: - self.shell.user_ns.pop('open', None) + # Restore the IPython namespace + if had_open_in_ns and shell_open is not None: + self.shell.user_ns['open'] = shell_open + else: + # Remove 'open' from namespace if it wasn't there before + self.shell.user_ns.pop('open', None) # Get final I/O counters for aggregate counts process = psutil.Process() From b5befd117d0bda5058247fc3048454b84307319b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 5 Nov 2025 04:06:05 +0000 Subject: [PATCH 5/5] Move builtins import to top and clarify __next__ tracking behavior Co-authored-by: mtauraso <31012+mtauraso@users.noreply.github.com> --- src/iops_profiler/iops_profiler.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/iops_profiler/iops_profiler.py b/src/iops_profiler/iops_profiler.py index a93e069..3005eef 100644 --- a/src/iops_profiler/iops_profiler.py +++ b/src/iops_profiler/iops_profiler.py @@ -9,6 +9,7 @@ f.write('Hello World') """ +import builtins import os import sys import time @@ -116,9 +117,6 @@ def _measure_windows_python_io(self, code, io_before): """ operations = [] - # Import builtins to patch open() - import builtins - original_open = builtins.open class IOTracker: @@ -184,9 +182,10 @@ def __iter__(self): return self def __next__(self): - # Let StopIteration propagate naturally + # Let StopIteration propagate naturally to signal iteration end line = self.file.__next__() - # Track the operation only if successful + # Track the operation only if it transferred data (non-zero bytes) + # This is consistent with strace/fs_usage which report actual bytes transferred if line: bytes_read = len(line) if isinstance(line, (bytes, str)) else 0 if bytes_read > 0: