Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

python support #95

Merged
merged 15 commits into from
Jul 12, 2023
5 changes: 4 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,8 @@ jobs:
go-version-file: .go-version
check-latest: true

- name: Setup python
run: make python

- name: Test
run: go test -v ./...
run: go test ./...
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@
testdata/rust/*/target/*
testdata/.sysroot/*
*.pb.gz
.python
16 changes: 14 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,17 @@ testdata.files = \
$(testdata.tinygo.wasm) \
$(testdata.wat.wasm)

python.files = .python/python.wasm .python/python311.zip

all: test

clean:
rm -f $(testdata.files)
rm -f $(testdata.files) $(python.files)

test: testdata
go test ./...

testdata: wasi-libc $(testdata.files)
testdata: wasi-libc python $(testdata.files)

testdata/.sysroot:
mkdir -p testdata/.sysroot
Expand Down Expand Up @@ -54,6 +56,16 @@ testdata/wat/%.wasm: testdata/wat/%.wat

wasi-libc: testdata/.sysroot/lib/wasm32-wasi/libc.a

python: $(python.files)

.python/python.wasm:
mkdir -p $(dir $@)
curl -fsSL https://timecraft.s3.amazonaws.com/python-vanilla/main/python.wasm -o $@

.python/python311.zip:
mkdir -p $(dir $@)
curl -fsSL https://timecraft.s3.amazonaws.com/python-vanilla/main/python311.zip -o $@

.gitmodules:
git submodule add --name wasi-libc -- \
'https://github.com/WebAssembly/wasi-libc' testdata/.wasi-libc
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,38 @@ The CPU time profiler measures the actual time spent on-CPU without taking into
account the off-CPU time (e.g waiting for I/O). For this profiler, all the
host-functions are considered off-CPU.

## Language support

wzprof runs some heuristics to assess what the guest module is running to adapt
the way it symbolizes and walks the stack. In all other cases, it defaults to
inspecting the wasm stack and uses DWARF information if present in the module.

### Golang

If the guest has been compiled by golang/go 1.21+, wzprof inspects the memory
to walk the Go stack, which provides full call stacks, instead of the shortened
versions you would get without this support.

In addition, wzprof parses pclntab to perform symbolization. This is the same
mechanism the Go runtime itself uses to display meaningful stack traces when a
panic occurs.

### Python 3.11

If the guest is CPython 3.11 and has been compiled with debug symbols (such as
[timecraft's][timecraft-python]), wzprof walks the Python interpreter call
stack, not the C stack it would otherwise report. This provides more meaningful
profiling information on the script being executed.

At the moment it does not support merging the C extension calls into the Python
interpreter stack.

Note that a current limitation of the implementation is that unloading or
reloading modules may result in an incorrect profile. If that's a problem for
you please file an issue in the github tracker.

[timecraft-python]: https://docs.timecraft.dev/getting-started/prep-application/compiling-python#preparing-python

## Contributing

Pull requests are welcome! Anything that is not a simple fix would probably
Expand Down
76 changes: 57 additions & 19 deletions cmd/wzprof/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ import (
// that.

func TestDataCSimple(t *testing.T) {
testMemoryProfiler(t, "../../testdata/c/simple.wasm", []sample{
p := program{filePath: "../../testdata/c/simple.wasm"}
testMemoryProfiler(t, p, []sample{
{
[]int64{1, 10},
[]frame{
Expand Down Expand Up @@ -53,7 +54,8 @@ func TestDataCSimple(t *testing.T) {
}

func TestDataRustSimple(t *testing.T) {
testMemoryProfiler(t, "../../testdata/rust/simple/target/wasm32-wasi/debug/simple.wasm", []sample{
p := program{filePath: "../../testdata/rust/simple/target/wasm32-wasi/debug/simple.wasm"}
testMemoryProfiler(t, p, []sample{
{
[]int64{1, 120},
[]frame{
Expand Down Expand Up @@ -89,10 +91,52 @@ func TestDataRustSimple(t *testing.T) {
})
}

func TestPyTwoCalls(t *testing.T) {
pyd := t.TempDir()
pyzip := filepath.Join(pyd, "/usr/local/lib/python311.zip")
pyscript := filepath.Join(pyd, "script.py")
os.MkdirAll(filepath.Dir(pyzip), os.ModePerm)
os.Link("../../.python/python311.zip", pyzip)
os.Link("../../testdata/python/simple.py", pyscript)

p := program{
filePath: "../../.python/python.wasm",
args: []string{"/script.py"},
mounts: []string{pyd + ":/"},
}

testCpuProfiler(t, p, []sample{
{ // deepest script.py call stack
[]int64{1},
[]frame{
{"script.a", 2, false},
{"script.b", 7, false},
{"script.c", 11, false},
{"script", 15, false},
},
},
})

testMemoryProfiler(t, p, []sample{
// byterray(100) allocates 28 bytes for the object, and 100+1 byte for
// the content because in python byte arrays are null-terminated. It
// first calls PyObject_Malloc(28), then PyObject_Realloc(101).
{
[]int64{2, 129},
[]frame{
{"script.a", 3, false},
{"script.b", 7, false},
{"script.c", 11, false},
{"script", 15, false},
},
},
})
}

func TestGoTwoCalls(t *testing.T) {
wasm := "../../testdata/go/twocalls.wasm"
p := program{filePath: "../../testdata/go/twocalls.wasm"}

testCpuProfiler(t, wasm, []sample{
testCpuProfiler(t, p, []sample{
{ // first call to myalloc1() from main.
[]int64{1},
[]frame{
Expand Down Expand Up @@ -129,7 +173,7 @@ func TestGoTwoCalls(t *testing.T) {
},
})

testMemoryProfiler(t, wasm, []sample{
testMemoryProfiler(t, p, []sample{
{ // first call to myalloc1() from main.
[]int64{1, 41},
[]frame{
Expand Down Expand Up @@ -167,35 +211,29 @@ func TestGoTwoCalls(t *testing.T) {
})
}

func testCpuProfiler(t *testing.T, path string, expectedSamples []sample) {
prog := &program{
filePath: path,
sampleRate: 1,
cpuProfile: filepath.Join(t.TempDir(), "cpu.pprof"),
}
func testCpuProfiler(t *testing.T, prog program, expectedSamples []sample) {
prog.sampleRate = 1
prog.cpuProfile = filepath.Join(t.TempDir(), "cpu.pprof")

expectedTypes := []string{
"samples",
"cpu",
}

p := execForProfile(t, prog, prog.cpuProfile)
p := execForProfile(t, &prog, prog.cpuProfile)
assertSamples(t, expectedTypes, expectedSamples, p)
}

func testMemoryProfiler(t *testing.T, path string, expectedSamples []sample) {
prog := &program{
filePath: path,
sampleRate: 1,
memProfile: filepath.Join(t.TempDir(), "mem.pprof"),
}
func testMemoryProfiler(t *testing.T, prog program, expectedSamples []sample) {
prog.sampleRate = 1
prog.memProfile = filepath.Join(t.TempDir(), "mem.pprof")

expectedTypes := []string{
"alloc_objects",
"alloc_space",
}

p := execForProfile(t, prog, prog.memProfile)
p := execForProfile(t, &prog, prog.memProfile)
assertSamples(t, expectedTypes, expectedSamples, p)
}

Expand Down
9 changes: 8 additions & 1 deletion cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,14 @@ func (p *CPUProfiler) NewHandler(sampleRate float64) http.Handler {
// NewFunctionListener returns a function listener suited to record CPU timings
// of calls to the function passed as argument.
func (p *CPUProfiler) NewFunctionListener(def api.FunctionDefinition) experimental.FunctionListener {
_, skip := p.p.filteredFunctions[def.Name()]
name := def.Name()
if len(p.p.onlyFunctions) > 0 {
_, keep := p.p.onlyFunctions[name]
if !keep {
return nil
}
}
_, skip := p.p.filteredFunctions[name]
if skip {
return nil
}
Expand Down
56 changes: 40 additions & 16 deletions dwarf.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@ import (
"sync"

"github.com/tetratelabs/wazero"
"github.com/tetratelabs/wazero/api"
"github.com/tetratelabs/wazero/experimental"
)

// buildDwarfSymbolizer constructs a Symbolizer instance from the DWARF sections
// of the given WebAssembly module.
func buildDwarfSymbolizer(module wazero.CompiledModule) (symbolizer, error) {
return newDwarfmapper(module.CustomSections())
func buildDwarfSymbolizer(parser dwarfparser) symbolizer {
return newDwarfmapper(parser)
}

type sourceOffsetRange = [2]uint64
Expand All @@ -42,42 +41,67 @@ type dwarfmapper struct {
onceSourceOffsetNotFound sync.Once
}

func newDwarfmapper(sections []api.CustomSection) (*dwarfmapper, error) {
var info, line, ranges, str, abbrev []byte
const (
debugInfo = ".debug_info"
debugLine = ".debug_line"
debugStr = ".debug_str"
debugAbbrev = ".debug_abbrev"
debugRanges = ".debug_ranges"
)

func newDwarfparser(module wazero.CompiledModule) (dwarfparser, error) {
sections := module.CustomSections()

var info, line, ranges, str, abbrev []byte
for _, section := range sections {
log.Printf("dwarf: found section %s", section.Name())
switch section.Name() {
case ".debug_info":
case debugInfo:
info = section.Data()
case ".debug_line":
case debugLine:
line = section.Data()
case ".debug_str":
case debugStr:
str = section.Data()
case ".debug_abbrev":
case debugAbbrev:
abbrev = section.Data()
case ".debug_ranges":
case debugRanges:
ranges = section.Data()
}
}

d, err := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str)
if err != nil {
return nil, fmt.Errorf("dwarf: %w", err)
return dwarfparser{}, fmt.Errorf("dwarf: %w", err)
}

r := d.Reader()
return dwarfparser{d: d, r: r}, nil
}

func newDwarfParserFromBin(wasmbin []byte) (dwarfparser, error) {
info := wasmCustomSection(wasmbin, debugInfo)
line := wasmCustomSection(wasmbin, debugLine)
ranges := wasmCustomSection(wasmbin, debugRanges)
str := wasmCustomSection(wasmbin, debugStr)
abbrev := wasmCustomSection(wasmbin, debugAbbrev)

d, err := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str)
if err != nil {
return dwarfparser{}, fmt.Errorf("dwarf: %w", err)
}

r := d.Reader()
return dwarfparser{d: d, r: r}, nil
}

p := dwarfparser{d: d, r: r}
func newDwarfmapper(p dwarfparser) *dwarfmapper {
subprograms := p.Parse()
log.Printf("dwarf: parsed %d subprogramm ranges", len(subprograms))

dm := &dwarfmapper{
d: d,
return &dwarfmapper{
d: p.d,
subprograms: subprograms,
}

return dm, nil
}

type dwarfparser struct {
Expand Down
32 changes: 32 additions & 0 deletions mem.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,38 @@ func (p *MemoryProfiler) NewHandler(sampleRate float64) http.Handler {
// compilers and libraries. It uses the function name to detect memory
// allocators, currently supporting libc, Go, and TinyGo.
func (p *MemoryProfiler) NewFunctionListener(def api.FunctionDefinition) experimental.FunctionListener {
if p.p.lang == python311 {
switch def.Name() {
// Raw domain
case "PyMem_RawMalloc":
return profilingListener{p.p, &mallocProfiler{memory: p}}
case "PyMem_RawCalloc":
return profilingListener{p.p, &callocProfiler{memory: p}}
case "PyMem_RawRealloc":
return profilingListener{p.p, &reallocProfiler{memory: p}}
case "PyMem_RawFree":
return profilingListener{p.p, &freeProfiler{memory: p}}
// Memory domain
case "PyMem_Malloc":
return profilingListener{p.p, &mallocProfiler{memory: p}}
case "PyMem_Calloc":
return profilingListener{p.p, &callocProfiler{memory: p}}
case "PyMem_Realloc":
return profilingListener{p.p, &reallocProfiler{memory: p}}
case "PyMem_Free":
return profilingListener{p.p, &freeProfiler{memory: p}}
// Object domain
case "PyObject_Malloc":
return profilingListener{p.p, &mallocProfiler{memory: p}}
case "PyObject_Calloc":
return profilingListener{p.p, &callocProfiler{memory: p}}
case "PyObject_Realloc":
return profilingListener{p.p, &reallocProfiler{memory: p}}
case "PyObject_Free":
return profilingListener{p.p, &freeProfiler{memory: p}}
}
return nil
}
switch def.Name() {
// C standard library, Rust
case "malloc":
Expand Down
Loading
Loading