Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix line invalidation speed #891

Merged
merged 21 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/test-smoketests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ jobs:
- name: decorator smoke test
run: python test/smoketest_profile_decorator.py

- name: line invalidation test
run: python test/smoketest_line_invalidation.py

# Note: This test doesn't need to read an output,
# it is meant to determine if there is an ImportError
# or anything related if relative imports are used.
Expand Down
5 changes: 4 additions & 1 deletion scalene/scalene_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def malloc_signal_handler(
ByteCodeIndex(f.f_lasti),
]
Scalene.__alloc_sigq.put([0])
pywhere.enable_settrace()
pywhere.enable_settrace(this_frame)
del this_frame

@staticmethod
Expand Down Expand Up @@ -1289,6 +1289,9 @@ def alloc_sigqueue_processor(x: Optional[List[int]]) -> None:
bytei,
) = item
is_malloc = action == Scalene.MALLOC_ACTION
if count == scalene.scalene_config.NEWLINE_TRIGGER_LENGTH + 1:
continue # in previous implementations, we were adding NEWLINE to the footprint.
# We should not account for this in the user-facing profile.
count /= Scalene.BYTES_PER_MB
if is_malloc:
stats.current_footprint += count
Expand Down
10 changes: 4 additions & 6 deletions src/include/sampleheap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,10 @@
// If this is the special NEWLINE value, trigger an update.
if (unlikely(realSize == NEWLINE)) {
std::string filename;
int lineno;
int bytei;
decltype(whereInPython)* where = p_whereInPython;
if (where != nullptr && where(filename, lineno, bytei)) {
writeCount(MallocSignal, realSize, ptr, filename, lineno, bytei);
}
// int lineno;
// int bytei;
// decltype(whereInPython)* where = p_whereInPython;
Fixed Show fixed Hide fixed
writeCount(MallocSignal, realSize, ptr, filename, -1, -1);
mallocTriggered()++;
return;
}
Expand Down
20 changes: 18 additions & 2 deletions src/source/pywhere.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,16 @@ static void allocate_newline() {

static int trace_func(PyObject* obj, PyFrameObject* frame, int what,
PyObject* arg) {
if (what == PyTrace_CALL || what == PyTrace_C_CALL) {
PyThreadState* tstate = PyThreadState_Get();
frame->f_trace_lines = 0;
frame->f_trace = NULL;
#if PY_VERSION_HEX >= 0x030a0000 && PY_VERSION_HEX < 0x030c0000
// This pre-3.12 optimization only exists post 3.9
tstate->cframe->use_tracing = 0;
#endif

}
if (what != PyTrace_LINE) {
return 0;
}
Expand Down Expand Up @@ -297,7 +307,6 @@ static int trace_func(PyObject* obj, PyFrameObject* frame, int what,
// Needed because decref will be called in on_stack
Py_INCREF(frame);
if (on_stack(last_fname_s, lineno_l, static_cast<PyFrameObject*>(frame))) {
frame->f_trace_lines = 0;
return 0;
}

Expand Down Expand Up @@ -370,7 +379,14 @@ static PyObject* depopulate_struct(PyObject* self, PyObject* args) {
}

static PyObject* enable_settrace(PyObject* self, PyObject* args) {
PyObject* frame;
if (!PyArg_ParseTuple(args, "O", &frame)) {
return NULL;
}
PyFrameObject* frame_obj = (PyFrameObject*) frame;
PyEval_SetTrace(trace_func, NULL);
// frame_obj->f_trace = &trace_func;
frame_obj->f_trace_lines = 1;
Py_RETURN_NONE;
}

Expand All @@ -389,7 +405,7 @@ static PyMethodDef EmbMethods[] = {
{"print_files_to_profile", print_files_to_profile, METH_NOARGS,
"printing for debug"},
// {"return_buffer", return_buffer, METH_NOARGS, ""},
{"enable_settrace", enable_settrace, METH_NOARGS, ""},
{"enable_settrace", enable_settrace, METH_VARARGS, ""},
{"disable_settrace", disable_settrace, METH_NOARGS, ""},
{"populate_struct", populate_struct, METH_NOARGS, ""},
{"depopulate_struct", depopulate_struct, METH_NOARGS, ""},
Expand Down
14 changes: 14 additions & 0 deletions test/line_attribution_tests/line_after_final_alloc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

def main():
accum = bytes()
for i in range(31):
accum += bytes(10485767 * 2)

print(len(accum))

asdf = bytes(2 * 10485767)
Dismissed Show dismissed Hide dismissed
some_dead_line = None
Dismissed Show dismissed Hide dismissed


if __name__ == '__main__':
main()
13 changes: 13 additions & 0 deletions test/line_attribution_tests/loop_below_threshold.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

def main():
accum = bytes()
for i in range(31):
accum += bytes(10485767 // 4) # far below the allocation sampling window

print(len(accum))

asdf = bytes(2 * 10485767)
Dismissed Show dismissed Hide dismissed


if __name__ == '__main__':
main()
13 changes: 13 additions & 0 deletions test/line_attribution_tests/loop_with_multiple_lines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

def main():
accum = bytes()
for i in range(31):
accum += bytes(2 * 10485767) # 2x the allocation sampling window
bogus = None
Fixed Show fixed Hide fixed
Dismissed Show dismissed Hide dismissed
print(len(accum))

asdf = bytes(2 * 10485767)
Dismissed Show dismissed Hide dismissed


if __name__ == '__main__':
main()
13 changes: 13 additions & 0 deletions test/line_attribution_tests/loop_with_one_alloc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

def main():
accum = bytes()
for i in range(31):
accum += bytes(2 * 10485767) # 2x the allocation sampling window

print(len(accum))

asdf = bytes(2 * 10485767)
Dismissed Show dismissed Hide dismissed


if __name__ == '__main__':
main()
12 changes: 12 additions & 0 deletions test/line_attribution_tests/loop_with_two_allocs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

def main():
accum = bytes()
for i in range(31):
accum += bytes(2 * 10485767) + bytes(2 * 10485767) # 2x the allocation sampling window

print(len(accum))

asdf = bytes(2 * 10485767)
Dismissed Show dismissed Hide dismissed

if __name__ == '__main__':
main()
104 changes: 104 additions & 0 deletions test/smoketest_line_invalidation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
This is bound very closely to the current implementation of
the tests in `test/line_attribution_tests.

The two things that matter are the number of loops, the size
of the allocations, and the exact line numbers.


"""

expected_md5_sums = {
"line_attribution_tests/loop_below_threshold.py": "7664a7dcc0f4ab94a44e431448b5f348",
"line_attribution_tests/loop_with_one_alloc.py": "da9ff0aa223123c956049e940c3ef093",
"line_attribution_tests/loop_with_multiple_lines.py": "48ce0e8693fe43b1ebb7eb75a0fd5832",
"line_attribution_tests/loop_with_two_allocs.py": "71f337140aa25383525e56a6167cabf8",
"line_attribution_tests/line_after_final_alloc.py": "ca8cdd44ea6e4a9c286c05facae6a721"
}

import subprocess
import tempfile
import sys
from typing import List
Fixed Show fixed Hide fixed
from pathlib import Path
from hashlib import md5
from scalene.scalene_json import ScaleneJSONSchema

N_LOOPS = 31
LOOP_ALLOC_LINENO = 5 #
OUT_OF_LOOP_ALLOC_LINENO = 9

def check_for_changes():
errors = []
for fname, expected_sum in expected_md5_sums.items():
with open(fname, 'rb') as f:
digest = md5(f.read()).hexdigest()
if digest != expected_sum:
errors.append(fname)
assert len(errors) == 0, f'Detected change in file(s) {','.join(errors)}'

def get_line(scalene_profile: ScaleneJSONSchema, lineno: int):
files = list(scalene_profile.files.keys())
assert len(files) == 1
filename = files[0]
return scalene_profile.files[filename].lines[lineno - 1]




def get_profile(test_stem, outdir_p, test_dir):
proc = subprocess.run(
Dismissed Show dismissed Hide dismissed
[
sys.executable,
"-m",
"scalene",
"--cli",
"--json",
"--outfile",
outdir_p / f"{test_stem}.json",
test_dir / f"{test_stem}.py",
],
capture_output=True,
check=True,
)
with open(outdir_p / f"{test_stem}.json", "r") as f:
profile = ScaleneJSONSchema.model_validate_json(f.read())
return (test_stem, profile)


def main():
test_dir = Path(__file__).parent / "line_attribution_tests"
with tempfile.TemporaryDirectory() as outdir:
outdir_p = Path(outdir)
one_alloc = get_profile("loop_with_one_alloc", outdir_p, test_dir)
two_on_one_line = get_profile("loop_with_two_allocs", outdir_p, test_dir)
below_threshold = get_profile("loop_below_threshold", outdir_p, test_dir)
line_after_final_alloc = get_profile(
"line_after_final_alloc", outdir_p, test_dir
)
errors = []
for stem, profile in [one_alloc, two_on_one_line, line_after_final_alloc]:
line = get_line(profile, LOOP_ALLOC_LINENO)
if not line.n_mallocs == N_LOOPS:
errors.append(f"Expected {N_LOOPS} distinct lines on {stem}, got {line.n_mallocs} on {LOOP_ALLOC_LINENO}")

bt_stem, bt_prof = below_threshold
bt_line = get_line(bt_prof, LOOP_ALLOC_LINENO)
if not bt_line.n_mallocs < N_LOOPS:
errors.append(f"{bt_stem} makes smaller allocations than the allocation sampling window, so fewer than {N_LOOPS} allocations on {LOOP_ALLOC_LINENO} should be reported. Got {bt_line.n_mallocs}")

for stem, profile in [one_alloc, two_on_one_line, below_threshold, line_after_final_alloc]:
line = get_line(profile, OUT_OF_LOOP_ALLOC_LINENO)
if not line.n_mallocs == 1:
errors.append(f'Line {OUT_OF_LOOP_ALLOC_LINENO} in {stem} makes a large allocation, so it should be reported.')

if len(errors) > 0:
for error in errors:
print(f'ERROR: {error}')
exit(1)
else:
print("PASS")
exit(0)

if __name__ == '__main__':
main()
Loading