dispatchrun · pelletier · Jul 12, 2023 · Jul 7, 2023 · Jul 7, 2023 · Jul 10, 2023
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -63,5 +63,8 @@ jobs:
         go-version-file: .go-version
         check-latest: true
 
+    - name: Setup python
+      run: make python
+
     - name: Test
-      run: go test -v ./...
+      run: go test ./...
diff --git a/.gitignore b/.gitignore
@@ -21,3 +21,4 @@
 testdata/rust/*/target/*
 testdata/.sysroot/*
 *.pb.gz
+.python
diff --git a/Makefile b/Makefile
@@ -18,15 +18,17 @@ testdata.files = \
 	$(testdata.tinygo.wasm) \
 	$(testdata.wat.wasm)
 
+python.files = .python/python.wasm .python/python311.zip
+
 all: test
 
 clean:
-	rm -f $(testdata.files)
+	rm -f $(testdata.files) $(python.files)
 
 test: testdata
 	go test ./...
 
-testdata: wasi-libc $(testdata.files)
+testdata: wasi-libc python $(testdata.files)
 
 testdata/.sysroot:
 	mkdir -p testdata/.sysroot
@@ -54,6 +56,16 @@ testdata/wat/%.wasm: testdata/wat/%.wat
 
 wasi-libc: testdata/.sysroot/lib/wasm32-wasi/libc.a
 
+python: $(python.files)
+
+.python/python.wasm:
+	mkdir -p $(dir $@)
+	curl -fsSL https://timecraft.s3.amazonaws.com/python-vanilla/main/python.wasm -o $@
+
+.python/python311.zip:
+	mkdir -p $(dir $@)
+	curl -fsSL https://timecraft.s3.amazonaws.com/python-vanilla/main/python311.zip -o $@
+
 .gitmodules:
 	git submodule add --name wasi-libc -- \
 		'https://github.com/WebAssembly/wasi-libc' testdata/.wasi-libc
diff --git a/README.md b/README.md
@@ -186,6 +186,38 @@ The CPU time profiler measures the actual time spent on-CPU without taking into
 account the off-CPU time (e.g waiting for I/O). For this profiler, all the
 host-functions are considered off-CPU.
 
+## Language support
+
+wzprof runs some heuristics to assess what the guest module is running to adapt
+the way it symbolizes and walks the stack. In all other cases, it defaults to
+inspecting the wasm stack and uses DWARF information if present in the module.
+
+### Golang
+
+If the guest has been compiled by golang/go 1.21+, wzprof inspects the memory
+to walk the Go stack, which provides full call stacks, instead of the shortened
+versions you would get without this support.
+
+In addition, wzprof parses pclntab to perform symbolization. This is the same
+mechanism the Go runtime itself uses to display meaningful stack traces when a
+panic occurs.
+
+### Python 3.11
+
+If the guest is CPython 3.11 and has been compiled with debug symbols (such as
+[timecraft's][timecraft-python]), wzprof walks the Python interpreter call
+stack, not the C stack it would otherwise report. This provides more meaningful
+profiling information on the script being executed.
+
+At the moment it does not support merging the C extension calls into the Python
+interpreter stack.
+
+Note that a current limitation of the implementation is that unloading or
+reloading modules may result in an incorrect profile. If that's a problem for
+you please file an issue in the github tracker.
+
+[timecraft-python]: https://docs.timecraft.dev/getting-started/prep-application/compiling-python#preparing-python
+
 ## Contributing
 
 Pull requests are welcome! Anything that is not a simple fix would probably

diff --git a/cmd/wzprof/main_test.go b/cmd/wzprof/main_test.go
@@ -16,7 +16,8 @@ import (
 // that.
 
 func TestDataCSimple(t *testing.T) {
-	testMemoryProfiler(t, "../../testdata/c/simple.wasm", []sample{
+	p := program{filePath: "../../testdata/c/simple.wasm"}
+	testMemoryProfiler(t, p, []sample{
 		{
 			[]int64{1, 10},
 			[]frame{
@@ -53,7 +54,8 @@ func TestDataCSimple(t *testing.T) {
 }
 
 func TestDataRustSimple(t *testing.T) {
-	testMemoryProfiler(t, "../../testdata/rust/simple/target/wasm32-wasi/debug/simple.wasm", []sample{
+	p := program{filePath: "../../testdata/rust/simple/target/wasm32-wasi/debug/simple.wasm"}
+	testMemoryProfiler(t, p, []sample{
 		{
 			[]int64{1, 120},
 			[]frame{
@@ -89,10 +91,52 @@ func TestDataRustSimple(t *testing.T) {
 	})
 }
 
+func TestPyTwoCalls(t *testing.T) {
+	pyd := t.TempDir()
+	pyzip := filepath.Join(pyd, "/usr/local/lib/python311.zip")
+	pyscript := filepath.Join(pyd, "script.py")
+	os.MkdirAll(filepath.Dir(pyzip), os.ModePerm)
+	os.Link("../../.python/python311.zip", pyzip)
+	os.Link("../../testdata/python/simple.py", pyscript)
+
+	p := program{
+		filePath: "../../.python/python.wasm",
+		args:     []string{"/script.py"},
+		mounts:   []string{pyd + ":/"},
+	}
+
+	testCpuProfiler(t, p, []sample{
+		{ // deepest script.py call stack
+			[]int64{1},
+			[]frame{
+				{"script.a", 2, false},
+				{"script.b", 7, false},
+				{"script.c", 11, false},
+				{"script", 15, false},
+			},
+		},
+	})
+
+	testMemoryProfiler(t, p, []sample{
+		// byterray(100) allocates 28 bytes for the object, and 100+1 byte for
+		// the content because in python byte arrays are null-terminated. It
+		// first calls PyObject_Malloc(28), then PyObject_Realloc(101).
+		{
+			[]int64{2, 129},
+			[]frame{
+				{"script.a", 3, false},
+				{"script.b", 7, false},
+				{"script.c", 11, false},
+				{"script", 15, false},
+			},
+		},
+	})
+}
+
 func TestGoTwoCalls(t *testing.T) {
-	wasm := "../../testdata/go/twocalls.wasm"
+	p := program{filePath: "../../testdata/go/twocalls.wasm"}
 
-	testCpuProfiler(t, wasm, []sample{
+	testCpuProfiler(t, p, []sample{
 		{ // first call to myalloc1() from main.
 			[]int64{1},
 			[]frame{
@@ -129,7 +173,7 @@ func TestGoTwoCalls(t *testing.T) {
 		},
 	})
 
-	testMemoryProfiler(t, wasm, []sample{
+	testMemoryProfiler(t, p, []sample{
 		{ // first call to myalloc1() from main.
 			[]int64{1, 41},
 			[]frame{
@@ -167,35 +211,29 @@ func TestGoTwoCalls(t *testing.T) {
 	})
 }
 
-func testCpuProfiler(t *testing.T, path string, expectedSamples []sample) {
-	prog := &program{
-		filePath:   path,
-		sampleRate: 1,
-		cpuProfile: filepath.Join(t.TempDir(), "cpu.pprof"),
-	}
+func testCpuProfiler(t *testing.T, prog program, expectedSamples []sample) {
+	prog.sampleRate = 1
+	prog.cpuProfile = filepath.Join(t.TempDir(), "cpu.pprof")
 
 	expectedTypes := []string{
 		"samples",
 		"cpu",
 	}
 
-	p := execForProfile(t, prog, prog.cpuProfile)
+	p := execForProfile(t, &prog, prog.cpuProfile)
 	assertSamples(t, expectedTypes, expectedSamples, p)
 }
 
-func testMemoryProfiler(t *testing.T, path string, expectedSamples []sample) {
-	prog := &program{
-		filePath:   path,
-		sampleRate: 1,
-		memProfile: filepath.Join(t.TempDir(), "mem.pprof"),
-	}
+func testMemoryProfiler(t *testing.T, prog program, expectedSamples []sample) {
+	prog.sampleRate = 1
+	prog.memProfile = filepath.Join(t.TempDir(), "mem.pprof")
 
 	expectedTypes := []string{
 		"alloc_objects",
 		"alloc_space",
 	}
 
-	p := execForProfile(t, prog, prog.memProfile)
+	p := execForProfile(t, &prog, prog.memProfile)
 	assertSamples(t, expectedTypes, expectedSamples, p)
 }
 

diff --git a/cpu.go b/cpu.go
@@ -188,7 +188,14 @@ func (p *CPUProfiler) NewHandler(sampleRate float64) http.Handler {
 // NewFunctionListener returns a function listener suited to record CPU timings
 // of calls to the function passed as argument.
 func (p *CPUProfiler) NewFunctionListener(def api.FunctionDefinition) experimental.FunctionListener {
-	_, skip := p.p.filteredFunctions[def.Name()]
+	name := def.Name()
+	if len(p.p.onlyFunctions) > 0 {
+		_, keep := p.p.onlyFunctions[name]
+		if !keep {
+			return nil
+		}
+	}
+	_, skip := p.p.filteredFunctions[name]
 	if skip {
 		return nil
 	}

diff --git a/dwarf.go b/dwarf.go
@@ -11,14 +11,13 @@ import (
 	"sync"
 
 	"github.com/tetratelabs/wazero"
-	"github.com/tetratelabs/wazero/api"
 	"github.com/tetratelabs/wazero/experimental"
 )
 
 // buildDwarfSymbolizer constructs a Symbolizer instance from the DWARF sections
 // of the given WebAssembly module.
-func buildDwarfSymbolizer(module wazero.CompiledModule) (symbolizer, error) {
-	return newDwarfmapper(module.CustomSections())
+func buildDwarfSymbolizer(parser dwarfparser) symbolizer {
+	return newDwarfmapper(parser)
 }
 
 type sourceOffsetRange = [2]uint64
@@ -42,42 +41,67 @@ type dwarfmapper struct {
 	onceSourceOffsetNotFound sync.Once
 }
 
-func newDwarfmapper(sections []api.CustomSection) (*dwarfmapper, error) {
-	var info, line, ranges, str, abbrev []byte
+const (
+	debugInfo   = ".debug_info"
+	debugLine   = ".debug_line"
+	debugStr    = ".debug_str"
+	debugAbbrev = ".debug_abbrev"
+	debugRanges = ".debug_ranges"
+)
 
+func newDwarfparser(module wazero.CompiledModule) (dwarfparser, error) {
+	sections := module.CustomSections()
+
+	var info, line, ranges, str, abbrev []byte
 	for _, section := range sections {
 		log.Printf("dwarf: found section %s", section.Name())
 		switch section.Name() {
-		case ".debug_info":
+		case debugInfo:
 			info = section.Data()
-		case ".debug_line":
+		case debugLine:
 			line = section.Data()
-		case ".debug_str":
+		case debugStr:
 			str = section.Data()
-		case ".debug_abbrev":
+		case debugAbbrev:
 			abbrev = section.Data()
-		case ".debug_ranges":
+		case debugRanges:
 			ranges = section.Data()
 		}
 	}
 
 	d, err := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str)
 	if err != nil {
-		return nil, fmt.Errorf("dwarf: %w", err)
+		return dwarfparser{}, fmt.Errorf("dwarf: %w", err)
+	}
+
+	r := d.Reader()
+	return dwarfparser{d: d, r: r}, nil
+}
+
+func newDwarfParserFromBin(wasmbin []byte) (dwarfparser, error) {
+	info := wasmCustomSection(wasmbin, debugInfo)
+	line := wasmCustomSection(wasmbin, debugLine)
+	ranges := wasmCustomSection(wasmbin, debugRanges)
+	str := wasmCustomSection(wasmbin, debugStr)
+	abbrev := wasmCustomSection(wasmbin, debugAbbrev)
+
+	d, err := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str)
+	if err != nil {
+		return dwarfparser{}, fmt.Errorf("dwarf: %w", err)
 	}
 
 	r := d.Reader()
+	return dwarfparser{d: d, r: r}, nil
+}
 
-	p := dwarfparser{d: d, r: r}
+func newDwarfmapper(p dwarfparser) *dwarfmapper {
 	subprograms := p.Parse()
 	log.Printf("dwarf: parsed %d subprogramm ranges", len(subprograms))
 
-	dm := &dwarfmapper{
-		d:           d,
+	return &dwarfmapper{
+		d:           p.d,
 		subprograms: subprograms,
 	}
-
-	return dm, nil
 }
 
 type dwarfparser struct {

diff --git a/mem.go b/mem.go
@@ -178,6 +178,38 @@ func (p *MemoryProfiler) NewHandler(sampleRate float64) http.Handler {
 // compilers and libraries. It uses the function name to detect memory
 // allocators, currently supporting libc, Go, and TinyGo.
 func (p *MemoryProfiler) NewFunctionListener(def api.FunctionDefinition) experimental.FunctionListener {
+	if p.p.lang == python311 {
+		switch def.Name() {
+		// Raw domain
+		case "PyMem_RawMalloc":
+			return profilingListener{p.p, &mallocProfiler{memory: p}}
+		case "PyMem_RawCalloc":
+			return profilingListener{p.p, &callocProfiler{memory: p}}
+		case "PyMem_RawRealloc":
+			return profilingListener{p.p, &reallocProfiler{memory: p}}
+		case "PyMem_RawFree":
+			return profilingListener{p.p, &freeProfiler{memory: p}}
+		// Memory domain
+		case "PyMem_Malloc":
+			return profilingListener{p.p, &mallocProfiler{memory: p}}
+		case "PyMem_Calloc":
+			return profilingListener{p.p, &callocProfiler{memory: p}}
+		case "PyMem_Realloc":
+			return profilingListener{p.p, &reallocProfiler{memory: p}}
+		case "PyMem_Free":
+			return profilingListener{p.p, &freeProfiler{memory: p}}
+		// Object domain
+		case "PyObject_Malloc":
+			return profilingListener{p.p, &mallocProfiler{memory: p}}
+		case "PyObject_Calloc":
+			return profilingListener{p.p, &callocProfiler{memory: p}}
+		case "PyObject_Realloc":
+			return profilingListener{p.p, &reallocProfiler{memory: p}}
+		case "PyObject_Free":
+			return profilingListener{p.p, &freeProfiler{memory: p}}
+		}
+		return nil
+	}
 	switch def.Name() {
 	// C standard library, Rust
 	case "malloc":
-Original file line number
+Diff line change
@@ @@ -21,3 +21,4 @@ @@
     testdata/rust/*/target/*
     testdata/.sysroot/*
     *.pb.gz
+    .python