Skip to content

python support #95

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jul 12, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -63,5 +63,8 @@ jobs:
go-version-file: .go-version
check-latest: true

- name: Setup python
run: make python

- name: Test
run: go test -v ./...
run: go test ./...
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -21,3 +21,4 @@
testdata/rust/*/target/*
testdata/.sysroot/*
*.pb.gz
.python
16 changes: 14 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -18,15 +18,17 @@ testdata.files = \
$(testdata.tinygo.wasm) \
$(testdata.wat.wasm)

python.files = .python/python.wasm .python/python311.zip

all: test

clean:
rm -f $(testdata.files)
rm -f $(testdata.files) $(python.files)

test: testdata
go test ./...

testdata: wasi-libc $(testdata.files)
testdata: wasi-libc python $(testdata.files)

testdata/.sysroot:
mkdir -p testdata/.sysroot
@@ -54,6 +56,16 @@ testdata/wat/%.wasm: testdata/wat/%.wat

wasi-libc: testdata/.sysroot/lib/wasm32-wasi/libc.a

python: $(python.files)

.python/python.wasm:
mkdir -p $(dir $@)
curl -fsSL https://timecraft.s3.amazonaws.com/python-vanilla/main/python.wasm -o $@

.python/python311.zip:
mkdir -p $(dir $@)
curl -fsSL https://timecraft.s3.amazonaws.com/python-vanilla/main/python311.zip -o $@

.gitmodules:
git submodule add --name wasi-libc -- \
'https://github.com/WebAssembly/wasi-libc' testdata/.wasi-libc
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -186,6 +186,38 @@ The CPU time profiler measures the actual time spent on-CPU without taking into
account the off-CPU time (e.g waiting for I/O). For this profiler, all the
host-functions are considered off-CPU.

## Language support

wzprof runs some heuristics to assess what the guest module is running to adapt
the way it symbolizes and walks the stack. In all other cases, it defaults to
inspecting the wasm stack and uses DWARF information if present in the module.

### Golang

If the guest has been compiled by golang/go 1.21+, wzprof inspects the memory
to walk the Go stack, which provides full call stacks, instead of the shortened
versions you would get without this support.

In addition, wzprof parses pclntab to perform symbolization. This is the same
mechanism the Go runtime itself uses to display meaningful stack traces when a
panic occurs.

### Python 3.11

If the guest is CPython 3.11 and has been compiled with debug symbols (such as
[timecraft's][timecraft-python]), wzprof walks the Python interpreter call
stack, not the C stack it would otherwise report. This provides more meaningful
profiling information on the script being executed.

At the moment it does not support merging the C extension calls into the Python
interpreter stack.

Note that a current limitation of the implementation is that unloading or
reloading modules may result in an incorrect profile. If that's a problem for
you please file an issue in the github tracker.

[timecraft-python]: https://docs.timecraft.dev/getting-started/prep-application/compiling-python#preparing-python

## Contributing

Pull requests are welcome! Anything that is not a simple fix would probably
76 changes: 57 additions & 19 deletions cmd/wzprof/main_test.go
Original file line number Diff line number Diff line change
@@ -16,7 +16,8 @@ import (
// that.

func TestDataCSimple(t *testing.T) {
testMemoryProfiler(t, "../../testdata/c/simple.wasm", []sample{
p := program{filePath: "../../testdata/c/simple.wasm"}
testMemoryProfiler(t, p, []sample{
{
[]int64{1, 10},
[]frame{
@@ -53,7 +54,8 @@ func TestDataCSimple(t *testing.T) {
}

func TestDataRustSimple(t *testing.T) {
testMemoryProfiler(t, "../../testdata/rust/simple/target/wasm32-wasi/debug/simple.wasm", []sample{
p := program{filePath: "../../testdata/rust/simple/target/wasm32-wasi/debug/simple.wasm"}
testMemoryProfiler(t, p, []sample{
{
[]int64{1, 120},
[]frame{
@@ -89,10 +91,52 @@ func TestDataRustSimple(t *testing.T) {
})
}

func TestPyTwoCalls(t *testing.T) {
pyd := t.TempDir()
pyzip := filepath.Join(pyd, "/usr/local/lib/python311.zip")
pyscript := filepath.Join(pyd, "script.py")
os.MkdirAll(filepath.Dir(pyzip), os.ModePerm)
os.Link("../../.python/python311.zip", pyzip)
os.Link("../../testdata/python/simple.py", pyscript)

p := program{
filePath: "../../.python/python.wasm",
args: []string{"/script.py"},
mounts: []string{pyd + ":/"},
}

testCpuProfiler(t, p, []sample{
{ // deepest script.py call stack
[]int64{1},
[]frame{
{"script.a", 2, false},
{"script.b", 7, false},
{"script.c", 11, false},
{"script", 15, false},
},
},
})

testMemoryProfiler(t, p, []sample{
// byterray(100) allocates 28 bytes for the object, and 100+1 byte for
// the content because in python byte arrays are null-terminated. It
// first calls PyObject_Malloc(28), then PyObject_Realloc(101).
{
[]int64{2, 129},
[]frame{
{"script.a", 3, false},
{"script.b", 7, false},
{"script.c", 11, false},
{"script", 15, false},
},
},
})
}

func TestGoTwoCalls(t *testing.T) {
wasm := "../../testdata/go/twocalls.wasm"
p := program{filePath: "../../testdata/go/twocalls.wasm"}

testCpuProfiler(t, wasm, []sample{
testCpuProfiler(t, p, []sample{
{ // first call to myalloc1() from main.
[]int64{1},
[]frame{
@@ -129,7 +173,7 @@ func TestGoTwoCalls(t *testing.T) {
},
})

testMemoryProfiler(t, wasm, []sample{
testMemoryProfiler(t, p, []sample{
{ // first call to myalloc1() from main.
[]int64{1, 41},
[]frame{
@@ -167,35 +211,29 @@ func TestGoTwoCalls(t *testing.T) {
})
}

func testCpuProfiler(t *testing.T, path string, expectedSamples []sample) {
prog := &program{
filePath: path,
sampleRate: 1,
cpuProfile: filepath.Join(t.TempDir(), "cpu.pprof"),
}
func testCpuProfiler(t *testing.T, prog program, expectedSamples []sample) {
prog.sampleRate = 1
prog.cpuProfile = filepath.Join(t.TempDir(), "cpu.pprof")

expectedTypes := []string{
"samples",
"cpu",
}

p := execForProfile(t, prog, prog.cpuProfile)
p := execForProfile(t, &prog, prog.cpuProfile)
assertSamples(t, expectedTypes, expectedSamples, p)
}

func testMemoryProfiler(t *testing.T, path string, expectedSamples []sample) {
prog := &program{
filePath: path,
sampleRate: 1,
memProfile: filepath.Join(t.TempDir(), "mem.pprof"),
}
func testMemoryProfiler(t *testing.T, prog program, expectedSamples []sample) {
prog.sampleRate = 1
prog.memProfile = filepath.Join(t.TempDir(), "mem.pprof")

expectedTypes := []string{
"alloc_objects",
"alloc_space",
}

p := execForProfile(t, prog, prog.memProfile)
p := execForProfile(t, &prog, prog.memProfile)
assertSamples(t, expectedTypes, expectedSamples, p)
}

9 changes: 8 additions & 1 deletion cpu.go
Original file line number Diff line number Diff line change
@@ -188,7 +188,14 @@ func (p *CPUProfiler) NewHandler(sampleRate float64) http.Handler {
// NewFunctionListener returns a function listener suited to record CPU timings
// of calls to the function passed as argument.
func (p *CPUProfiler) NewFunctionListener(def api.FunctionDefinition) experimental.FunctionListener {
_, skip := p.p.filteredFunctions[def.Name()]
name := def.Name()
if len(p.p.onlyFunctions) > 0 {
_, keep := p.p.onlyFunctions[name]
if !keep {
return nil
}
}
_, skip := p.p.filteredFunctions[name]
if skip {
return nil
}
56 changes: 40 additions & 16 deletions dwarf.go
Original file line number Diff line number Diff line change
@@ -11,14 +11,13 @@ import (
"sync"

"github.com/tetratelabs/wazero"
"github.com/tetratelabs/wazero/api"
"github.com/tetratelabs/wazero/experimental"
)

// buildDwarfSymbolizer constructs a Symbolizer instance from the DWARF sections
// of the given WebAssembly module.
func buildDwarfSymbolizer(module wazero.CompiledModule) (symbolizer, error) {
return newDwarfmapper(module.CustomSections())
func buildDwarfSymbolizer(parser dwarfparser) symbolizer {
return newDwarfmapper(parser)
}

type sourceOffsetRange = [2]uint64
@@ -42,42 +41,67 @@ type dwarfmapper struct {
onceSourceOffsetNotFound sync.Once
}

func newDwarfmapper(sections []api.CustomSection) (*dwarfmapper, error) {
var info, line, ranges, str, abbrev []byte
const (
debugInfo = ".debug_info"
debugLine = ".debug_line"
debugStr = ".debug_str"
debugAbbrev = ".debug_abbrev"
debugRanges = ".debug_ranges"
)

func newDwarfparser(module wazero.CompiledModule) (dwarfparser, error) {
sections := module.CustomSections()

var info, line, ranges, str, abbrev []byte
for _, section := range sections {
log.Printf("dwarf: found section %s", section.Name())
switch section.Name() {
case ".debug_info":
case debugInfo:
info = section.Data()
case ".debug_line":
case debugLine:
line = section.Data()
case ".debug_str":
case debugStr:
str = section.Data()
case ".debug_abbrev":
case debugAbbrev:
abbrev = section.Data()
case ".debug_ranges":
case debugRanges:
ranges = section.Data()
}
}

d, err := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str)
if err != nil {
return nil, fmt.Errorf("dwarf: %w", err)
return dwarfparser{}, fmt.Errorf("dwarf: %w", err)
}

r := d.Reader()
return dwarfparser{d: d, r: r}, nil
}

func newDwarfParserFromBin(wasmbin []byte) (dwarfparser, error) {
info := wasmCustomSection(wasmbin, debugInfo)
line := wasmCustomSection(wasmbin, debugLine)
ranges := wasmCustomSection(wasmbin, debugRanges)
str := wasmCustomSection(wasmbin, debugStr)
abbrev := wasmCustomSection(wasmbin, debugAbbrev)

d, err := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str)
if err != nil {
return dwarfparser{}, fmt.Errorf("dwarf: %w", err)
}

r := d.Reader()
return dwarfparser{d: d, r: r}, nil
}

p := dwarfparser{d: d, r: r}
func newDwarfmapper(p dwarfparser) *dwarfmapper {
subprograms := p.Parse()
log.Printf("dwarf: parsed %d subprogramm ranges", len(subprograms))

dm := &dwarfmapper{
d: d,
return &dwarfmapper{
d: p.d,
subprograms: subprograms,
}

return dm, nil
}

type dwarfparser struct {
32 changes: 32 additions & 0 deletions mem.go
Original file line number Diff line number Diff line change
@@ -178,6 +178,38 @@ func (p *MemoryProfiler) NewHandler(sampleRate float64) http.Handler {
// compilers and libraries. It uses the function name to detect memory
// allocators, currently supporting libc, Go, and TinyGo.
func (p *MemoryProfiler) NewFunctionListener(def api.FunctionDefinition) experimental.FunctionListener {
if p.p.lang == python311 {
switch def.Name() {
// Raw domain
case "PyMem_RawMalloc":
return profilingListener{p.p, &mallocProfiler{memory: p}}
case "PyMem_RawCalloc":
return profilingListener{p.p, &callocProfiler{memory: p}}
case "PyMem_RawRealloc":
return profilingListener{p.p, &reallocProfiler{memory: p}}
case "PyMem_RawFree":
return profilingListener{p.p, &freeProfiler{memory: p}}
// Memory domain
case "PyMem_Malloc":
return profilingListener{p.p, &mallocProfiler{memory: p}}
case "PyMem_Calloc":
return profilingListener{p.p, &callocProfiler{memory: p}}
case "PyMem_Realloc":
return profilingListener{p.p, &reallocProfiler{memory: p}}
case "PyMem_Free":
return profilingListener{p.p, &freeProfiler{memory: p}}
// Object domain
case "PyObject_Malloc":
return profilingListener{p.p, &mallocProfiler{memory: p}}
case "PyObject_Calloc":
return profilingListener{p.p, &callocProfiler{memory: p}}
case "PyObject_Realloc":
return profilingListener{p.p, &reallocProfiler{memory: p}}
case "PyObject_Free":
return profilingListener{p.p, &freeProfiler{memory: p}}
}
return nil
}
switch def.Name() {
// C standard library, Rust
case "malloc":
Loading