From 4e3c110b3e6bc760d318090af6bd1aac7945cf67 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Fri, 7 Jul 2023 14:26:47 -0400 Subject: [PATCH 01/15] wip: python support --- cpu.go | 13 +- dwarf.go | 23 ++-- mem.go | 1 + memory.go | 56 ++++++-- pclntab.go | 96 +++++++------- python.go | 368 +++++++++++++++++++++++++++++++++++++++++++++++++++ traceback.go | 48 +++---- wzprof.go | 30 ++++- 8 files changed, 539 insertions(+), 96 deletions(-) create mode 100644 python.go diff --git a/cpu.go b/cpu.go index 62ba3e3..2bdeb3a 100644 --- a/cpu.go +++ b/cpu.go @@ -188,7 +188,14 @@ func (p *CPUProfiler) NewHandler(sampleRate float64) http.Handler { // NewFunctionListener returns a function listener suited to record CPU timings // of calls to the function passed as argument. func (p *CPUProfiler) NewFunctionListener(def api.FunctionDefinition) experimental.FunctionListener { - _, skip := p.p.filteredFunctions[def.Name()] + name := def.Name() + if len(p.p.onlyFunctions) > 0 { + _, keep := p.p.onlyFunctions[name] + if !keep { + return nil + } + } + _, skip := p.p.filteredFunctions[name] if skip { return nil } @@ -197,7 +204,10 @@ func (p *CPUProfiler) NewFunctionListener(def api.FunctionDefinition) experiment type cpuProfiler struct{ *CPUProfiler } +var mydepth = 0 + func (p cpuProfiler) Before(ctx context.Context, mod api.Module, def api.FunctionDefinition, _ []uint64, si experimental.StackIterator) { + mydepth++ var frame cpuTimeFrame p.mutex.Lock() @@ -222,6 +232,7 @@ func (p cpuProfiler) Before(ctx context.Context, mod api.Module, def api.Functio } func (p cpuProfiler) After(ctx context.Context, mod api.Module, def api.FunctionDefinition, _ []uint64) { + mydepth-- i := len(p.frames) - 1 f := p.frames[i] p.frames = p.frames[:i] diff --git a/dwarf.go b/dwarf.go index 6114b18..8f0a016 100644 --- a/dwarf.go +++ b/dwarf.go @@ -11,14 +11,13 @@ import ( "sync" "github.com/tetratelabs/wazero" - "github.com/tetratelabs/wazero/api" "github.com/tetratelabs/wazero/experimental" ) // buildDwarfSymbolizer constructs a Symbolizer instance from the DWARF sections // of the given WebAssembly module. -func buildDwarfSymbolizer(module wazero.CompiledModule) (symbolizer, error) { - return newDwarfmapper(module.CustomSections()) +func buildDwarfSymbolizer(parser dwarfparser) symbolizer { + return newDwarfmapper(parser) } type sourceOffsetRange = [2]uint64 @@ -42,9 +41,10 @@ type dwarfmapper struct { onceSourceOffsetNotFound sync.Once } -func newDwarfmapper(sections []api.CustomSection) (*dwarfmapper, error) { - var info, line, ranges, str, abbrev []byte +func newDwarfparser(module wazero.CompiledModule) (dwarfparser, error) { + sections := module.CustomSections() + var info, line, ranges, str, abbrev []byte for _, section := range sections { log.Printf("dwarf: found section %s", section.Name()) switch section.Name() { @@ -63,21 +63,22 @@ func newDwarfmapper(sections []api.CustomSection) (*dwarfmapper, error) { d, err := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str) if err != nil { - return nil, fmt.Errorf("dwarf: %w", err) + return dwarfparser{}, fmt.Errorf("dwarf: %w", err) } r := d.Reader() - p := dwarfparser{d: d, r: r} + return dwarfparser{d: d, r: r}, nil +} + +func newDwarfmapper(p dwarfparser) *dwarfmapper { subprograms := p.Parse() log.Printf("dwarf: parsed %d subprogramm ranges", len(subprograms)) - dm := &dwarfmapper{ - d: d, + return &dwarfmapper{ + d: p.d, subprograms: subprograms, } - - return dm, nil } type dwarfparser struct { diff --git a/mem.go b/mem.go index 6042b0f..3e6d4c0 100644 --- a/mem.go +++ b/mem.go @@ -178,6 +178,7 @@ func (p *MemoryProfiler) NewHandler(sampleRate float64) http.Handler { // compilers and libraries. It uses the function name to detect memory // allocators, currently supporting libc, Go, and TinyGo. func (p *MemoryProfiler) NewFunctionListener(def api.FunctionDefinition) experimental.FunctionListener { + return nil // TODO switch def.Name() { // C standard library, Rust case "malloc": diff --git a/memory.go b/memory.go index b18455a..bb6e495 100644 --- a/memory.go +++ b/memory.go @@ -8,10 +8,26 @@ import ( "unsafe" ) -// ptr represents an address in the guest memory. It replaces unintptr in the -// original unwinder code. Here, the unwinder executes in the host, so this type -// helps to avoid dereferencing the host memory. -type ptr uint64 +// ptr64 represents a 64-bits address in the guest memory. It replaces unintptr +// in the original unwinder code. Here, the unwinder executes in the host, so +// this type helps to avoid dereferencing the host memory. +type ptr64 uint64 + +func (p ptr64) addr() uint32 { + return uint32(p) +} + +// ptr32 represents a 32-bits address in the guest memory. It replaces pointers +// in clang-wasi generated code. +type ptr32 uint32 + +func (p ptr32) addr() uint32 { + return uint32(p) +} + +type ptr interface { + addr() uint32 +} // vmem is the minimum interface required for virtual memory accesses in this // package. Is is used to read guest memory and rebuild the constructs needed @@ -26,6 +42,10 @@ type ptr uint64 // uintptr/unsafe.Pointer are used to manipulate memory seen by the host, and // ptr is used to represent memory inside the guest. type vmem interface { + // Read returns a view of the size bytes at the given virtual + // address, or false if the requested bytes are out of range. + // Users of this output need not modify the bytes, and make a copy + // of them if they wish to persist the data. Read(address, size uint32) ([]byte, bool) } @@ -36,20 +56,37 @@ type vmem interface { func deref[T any](r vmem, p ptr) T { var t T s := uint32(unsafe.Sizeof(t)) - b, ok := r.Read(uint32(p), s) + b, ok := r.Read(p.addr(), s) if !ok { panic(fmt.Errorf("invalid virtual memory read at %#x size %d", p, s)) } return *(*T)(unsafe.Pointer((unsafe.SliceData(b)))) } +// derefArrayInto copies into the given host slice contiguous elements +// of type T starting at the virtual address p to fill it. +func derefArray[T any](r vmem, p ptr, n uint32) []T { + var t T + s := uint32(unsafe.Sizeof(t)) * n + view, ok := r.Read(p.addr(), s) + if !ok { + panic(fmt.Errorf("invalid virtual memory array read at %#x size %d", p, s)) + } + + outb := make([]byte, s) + copy(outb, view) + x := (*T)(unsafe.Pointer(unsafe.SliceData(outb))) + return unsafe.Slice(x, n) +} + // derefGoSlice takes a slice whose data pointer targets the guest memory, and // returns a copy the slice's contents in host memory. It is not recursive. Cap -// is set to Len, no matter its initial value. +// is set to Len, no matter its initial value. Assumes the underlying pointer is +// 64-bits. func derefGoSlice[T any](r vmem, s []T) []T { count := len(s) sh := (*reflect.SliceHeader)(unsafe.Pointer(&s)) - dp := ptr(sh.Data) + dp := ptr64(sh.Data) res := make([]T, count) for i := 0; i < count; i++ { res[i] = derefArrayIndex[T](r, dp, int32(i)) @@ -60,6 +97,7 @@ func derefGoSlice[T any](r vmem, s []T) []T { // Reads the i-th element of an array that starts at address p. func derefArrayIndex[T any](r vmem, p ptr, i int32) T { var t T - s := ptr(unsafe.Sizeof(t)) - return deref[T](r, p+ptr(i)*s) + a := p.addr() + s := uint32(unsafe.Sizeof(t)) + return deref[T](r, ptr32(a+uint32(i)*s)) } diff --git a/pclntab.go b/pclntab.go index 8c58d4d..2cab76d 100644 --- a/pclntab.go +++ b/pclntab.go @@ -177,7 +177,7 @@ func preparePclntabSymbolizer(wasmbin []byte, mod wazero.CompiledModule) (*pclnt return &pclntab{ imported: uint64(len(mod.ImportedFunctions())), modName: mod.Name(), - datap: ptr(mdaddr), + datap: ptr64(mdaddr), }, nil } @@ -209,7 +209,7 @@ func (f *_func) isInlined() bool { // by the first ptr. type funcinl struct { ones uint32 // set to ^0 to distinguish from _func - entry ptr // entry of the real (the "outermost") frame + entry ptr64 // entry of the real (the "outermost") frame name string file string line int32 @@ -245,7 +245,7 @@ func (f funcInfo) valid() bool { return f._func != nil } -func (f funcInfo) entry() ptr { +func (f funcInfo) entry() ptr64 { return f.md.textAddr(f.EntryOff) } @@ -257,7 +257,7 @@ func (f funcInfo) name() string { // source code corresponding to the program counter pc. // The result will not be accurate if pc is not a program // counter within f. -func (f funcInfo) fileLine(pc ptr) (file string, line int) { +func (f funcInfo) fileLine(pc ptr64) (file string, line int) { fn := f._func if fn.isInlined() { // inlined version fi := (*funcinl)(unsafe.Pointer(fn)) @@ -269,7 +269,7 @@ func (f funcInfo) fileLine(pc ptr) (file string, line int) { return file, int(line32) } -func funcline1(f funcInfo, targetpc ptr) (file string, line int32) { +func funcline1(f funcInfo, targetpc ptr64) (file string, line int32) { datap := f.md if !f.valid() { return "?", 0 @@ -306,7 +306,7 @@ func funcfile(f funcInfo, fileno int32) string { // derefs. type pclntabOff uint32 -func pcdatavalue1(f funcInfo, table uint32, targetpc ptr) int32 { +func pcdatavalue1(f funcInfo, table uint32, targetpc ptr64) int32 { if table >= f.Npcdata { return -1 } @@ -342,7 +342,7 @@ type pclntab struct { modName string // Virtual address of the firstmoduledata structure. Named like this for // similarity with the Go implementation. - datap ptr + datap ptr64 mem vmem md moduledata @@ -367,7 +367,7 @@ func (p *pclntab) EnsureReady(mem vmem) { // // TODO: support multiple go modules. // TODO: cache this, as it's on the hot path. -func (p *pclntab) FindFunc(pc ptr) funcInfo { +func (p *pclntab) FindFunc(pc ptr64) funcInfo { if pc < p.md.minpc || pc >= p.md.maxpc { return funcInfo{} } @@ -382,11 +382,11 @@ func (p *pclntab) FindFunc(pc ptr) funcInfo { return funcInfo{} } - x := ptr(pcOff) + p.md.text - p.md.minpc + x := ptr64(pcOff) + p.md.text - p.md.minpc b := x / pcbucketsize i := x % pcbucketsize / (pcbucketsize / nsub) - ffb := deref[findfuncbucket](p.mem, p.md.findfunctab+b*ptr(unsafe.Sizeof(findfuncbucket{}))) + ffb := deref[findfuncbucket](p.mem, p.md.findfunctab+b*ptr64(unsafe.Sizeof(findfuncbucket{}))) idx := ffb.idx + uint32(ffb.subbuckets[i]) @@ -412,7 +412,7 @@ func (p *pclntab) Locations(gofunc experimental.InternalFunction, pc experimenta var calleeFuncID goruntime.FuncID - iu, uf := newInlineUnwinder(p, f.mem, f.info, symPC(f.info, ptr(pc))) + iu, uf := newInlineUnwinder(p, f.mem, f.info, symPC(f.info, ptr64(pc))) for ; uf.valid(); uf = iu.next(uf) { sf := iu.srcFunc(uf) if sf.funcID == goruntime.FuncIDWrapper && elideWrapperCalling(calleeFuncID) { @@ -450,7 +450,7 @@ func (p *pclntab) Locations(gofunc experimental.InternalFunction, pc experimenta // can be at function entry when the frame is initialized without actually // running code, like in runtime.mstart, in which case this returns frame.pc // because that's the best we can do. -func symPC(fn funcInfo, pc ptr) ptr { +func symPC(fn funcInfo, pc ptr64) ptr64 { if pc > fn.entry() { // Regular call. return pc - 1 @@ -462,15 +462,15 @@ func symPC(fn funcInfo, pc ptr) ptr { // https://github.com/golang/go/blob/4859392cc29a35a0126e249ecdedbd022c755b20/src/cmd/link/internal/wasm/asm.go#L45 const funcValueOffset = 0x1000 -func (p *pclntab) PCToFID(pc ptr) fid { +func (p *pclntab) PCToFID(pc ptr64) fid { return fid(uint64(pc)>>16 + p.imported - funcValueOffset) } -func (p *pclntab) FIDToPC(f fid) ptr { - return ptr((funcValueOffset + f - fid(p.imported)) << 16) +func (p *pclntab) FIDToPC(f fid) ptr64 { + return ptr64((funcValueOffset + f - fid(p.imported)) << 16) } -func (p *pclntab) PCToName(pc ptr) string { +func (p *pclntab) PCToName(pc ptr64) string { f := p.FindFunc(pc) if !f.valid() { return "" @@ -478,7 +478,7 @@ func (p *pclntab) PCToName(pc ptr) string { return f.name() } -func (p *pclntab) PCToLine(pc ptr) (file string, line int, f funcInfo) { +func (p *pclntab) PCToLine(pc ptr64) (file string, line int, f funcInfo) { f = p.FindFunc(pc) if !f.valid() { return @@ -491,7 +491,7 @@ func (p *pclntab) PCToLine(pc ptr) (file string, line int, f funcInfo) { // ptr, but is a separate type to avoid confusion between the two. The main // difference is a gptr is not supposed to have arithmetic done on it outside // rtmem. Also, easier to replace guintptr with a dedicated type. -type gptr ptr +type gptr ptr64 // Layout of g struct: // @@ -529,8 +529,8 @@ type gptr ptr // goSigStack and sigmask are 0 because // https://github.com/golang/go/blob/b950cc8f11dc31cc9f6cfbed883818a7aa3abe94/src/runtime/os_wasm.go#L132 -func gM(m vmem, g gptr) ptr { - return deref[ptr](m, ptr(g)+8*6) +func gM(m vmem, g gptr) ptr64 { + return deref[ptr64](m, ptr64(g)+8*6) } func gMG0(m vmem, g gptr) gptr { @@ -541,16 +541,16 @@ func gMCurg(m vmem, g gptr) gptr { return deref[gptr](m, gM(m, g)+144) } -func gSchedSp(m vmem, g gptr) ptr { - return deref[ptr](m, ptr(g)+8*7) +func gSchedSp(m vmem, g gptr) ptr64 { + return deref[ptr64](m, ptr64(g)+8*7) } -func gSchedPc(m vmem, g gptr) ptr { - return deref[ptr](m, ptr(g)+8*8) +func gSchedPc(m vmem, g gptr) ptr64 { + return deref[ptr64](m, ptr64(g)+8*8) } -func gSchedLr(m vmem, g gptr) ptr { - return deref[ptr](m, ptr(g)+8*12) +func gSchedLr(m vmem, g gptr) ptr64 { + return deref[ptr64](m, ptr64(g)+8*12) } // goStackIterator iterates over the physical frames of the Go stack. It is up @@ -559,7 +559,7 @@ func gSchedLr(m vmem, g gptr) ptr { type goStackIterator struct { first bool pclntab *pclntab - pc ptr + pc ptr64 unwinder } @@ -616,7 +616,7 @@ type goFunction struct { mem vmem sym *pclntab info funcInfo - pc ptr + pc ptr64 api.FunctionDefinition // required for WazeroOnly } @@ -681,9 +681,9 @@ type functab struct { // Mapping information for secondary text sections. type textsect struct { - vaddr ptr // prelinked section vaddr - end ptr // vaddr + section length - baseaddr ptr // relocated section address + vaddr ptr64 // prelinked section vaddr + end ptr64 // vaddr + section length + baseaddr ptr64 // relocated section address } // findfuncbucket is an array of these structures. @@ -704,25 +704,25 @@ type findfuncbucket struct { // update derefModuleData accordingly. // nolint:unused type moduledata struct { - pcHeader ptr + pcHeader ptr64 funcnametab []byte cutab []uint32 filetab []byte pctab []byte pclntable []byte ftab []functab - findfunctab ptr - minpc, maxpc ptr - text, etext ptr - noptrdata, enoptrdata ptr - data, edata ptr - bss, ebss ptr - noptrbss, enoptrbss ptr - covctrs, ecovctrs ptr - end, gcdata, gcbss ptr - types, etypes ptr - rodata ptr - gofunc ptr // go.func.* + findfunctab ptr64 + minpc, maxpc ptr64 + text, etext ptr64 + noptrdata, enoptrdata ptr64 + data, edata ptr64 + bss, ebss ptr64 + noptrbss, enoptrbss ptr64 + covctrs, ecovctrs ptr64 + end, gcdata, gcbss ptr64 + types, etypes ptr64 + rodata ptr64 + gofunc ptr64 // go.func.* textsectmap []textsect // more fields we don't care about now. // ... @@ -750,7 +750,7 @@ func cstring(b []byte) string { // textOff is the opposite of textAddr. It converts a PC to a (virtual) offset // to md.text, and returns if the PC is in any Go text section. -func (md moduledata) textOff(pc ptr) (uint32, bool) { +func (md moduledata) textOff(pc ptr64) (uint32, bool) { res := uint32(pc - md.text) if len(md.textsectmap) > 1 { for i, sect := range md.textsectmap { @@ -786,8 +786,8 @@ func (md moduledata) textOff(pc ptr) (uint32, bool) { // compared against the section vaddrs and ends to determine the containing // section. Then the section relative offset is added to the section's relocated // baseaddr to compute the function address. -func (md moduledata) textAddr(off32 uint32) ptr { - off := ptr(off32) +func (md moduledata) textAddr(off32 uint32) ptr64 { + off := ptr64(off32) res := md.text + off if len(md.textsectmap) > 1 { for i, sect := range md.textsectmap { @@ -802,7 +802,7 @@ func (md moduledata) textAddr(off32 uint32) ptr { } // Retrieve module data from memory, including slices. -func derefModuledata(mem vmem, addr ptr) moduledata { +func derefModuledata(mem vmem, addr ptr64) moduledata { m := deref[moduledata](mem, addr) m.funcnametab = derefGoSlice(mem, m.funcnametab) m.cutab = derefGoSlice(mem, m.cutab) diff --git a/python.go b/python.go new file mode 100644 index 0000000..e844cdd --- /dev/null +++ b/python.go @@ -0,0 +1,368 @@ +package wzprof + +import ( + "debug/dwarf" + "encoding/binary" + "fmt" + "unsafe" + + "github.com/tetratelabs/wazero/api" + "github.com/tetratelabs/wazero/experimental" +) + +// Heuristic to guess whether the wasm binary is actually CPython, based on its +// DWARF information. +// +// It loops over compile units to find one named "Programs/python.c". It should +// be fast since it's the first compile unit when we build CPython. +func guessPython(p dwarfparser) bool { + for { + ent, err := p.r.Next() + if err != nil || ent == nil { + break + } + if ent.Tag != dwarf.TagCompileUnit { + p.r.SkipChildren() + continue + } + name, _ := ent.Val(dwarf.AttrName).(string) + if name == "Programs/python.c" { + return true + } + p.r.SkipChildren() + } + return false +} + +type python struct { + dwarf dwarfparser + pyrtaddr ptr32 + + counter uint64 +} + +func preparePython(dwarf dwarfparser) (*python, error) { + pyrtaddr := findPyRuntime(dwarf) + if pyrtaddr == 0 { + return nil, fmt.Errorf("could not find _PyRuntime address") + } + return &python{ + dwarf: dwarf, + pyrtaddr: ptr32(pyrtaddr), + }, nil +} + +// Find the address of the _PyRuntime symbol from the dwarf information. +// Returns 0 if not found. +func findPyRuntime(p dwarfparser) uint32 { + for { + ent, err := p.r.Next() + if err != nil || ent == nil { + break + } + if ent.Tag != dwarf.TagVariable { + continue + } + name, _ := ent.Val(dwarf.AttrName).(string) + if name != "_PyRuntime" { + continue + } + f := ent.AttrField(dwarf.AttrLocation) + if f == nil { + panic("_PyRuntime does not have a location") + } + if f.Class != dwarf.ClassExprLoc { + panic(fmt.Errorf("invalid location class: %s", f.Class)) + } + const DW_OP_addr = 0x3 + loc := f.Val.([]byte) + if len(loc) == 0 || loc[0] != DW_OP_addr { + panic(fmt.Errorf("unexpected address format: %X", loc)) + } + return binary.LittleEndian.Uint32(loc[1:]) + } + return 0 +} + +// Padding of fields in various CPython structs. They are calculated +// by writing a function in any CPython module, and executing it with +// wazero. +// +// TODO: look into using CGO and #import to generate them +// instead. +const ( + // _PyRuntimeState + padTstateCurrentInRT = 360 + // PyThreadState + padCframeInThreadState = 40 + // _PyCFrame + padCurrentFrameInCFrame = 4 + // _PyInterpreterFrame + padPreviousInFrame = 24 + padCodeInFrame = 16 + padPrevInstrInFrame = 28 + padOwnerInFrame = 37 + // PyCodeObject + padFilenameInCodeObject = 80 + padNameInCodeObject = 84 + padCodeAdaptiveInCodeObject = 116 + padFirstlinenoInCodeObject = 48 + padLinearrayInCodeObject = 104 + padLinetableInCodeObject = 92 + padFirstTraceableInCodeObject = 108 + sizeCodeUnit = 2 + // PyASCIIObject + padStateInAsciiObject = 16 + padLengthInAsciiObject = 8 + sizeAsciiObject = 24 + // PyBytesObject + padSvalInBytesObject = 16 + padSizeInBytesObject = 8 + // Enum constants + enumCodeLocation1 = 11 + enumCodeLocation2 = 12 + enumCodeLocationNoCol = 13 + enumCodeLocationLong = 14 + enumFrameOwnedByGenerator = 1 +) + +func (p *python) Locations(fn experimental.InternalFunction, pc experimental.ProgramCounter) (uint64, []location) { + call := fn.(pyfuncall) + + loc := location{ + File: call.file, + Line: int64(call.line), + HumanName: call.name, + StableName: call.name, + } + + return uint64(call.addr), []location{loc} +} + +func (p *python) Stackiter(mod api.Module, def api.FunctionDefinition, wasmsi experimental.StackIterator) experimental.StackIterator { + m := mod.Memory() + tsp := deref[ptr32](m, p.pyrtaddr+padTstateCurrentInRT) + cframep := deref[ptr32](m, tsp+padCframeInThreadState) + framep := deref[ptr32](m, cframep+padCurrentFrameInCFrame) + + return &pystackiter{ + namedbg: def.DebugName(), + counter: &p.counter, + mem: m, + framep: framep, + } +} + +type pystackiter struct { + namedbg string + counter *uint64 + mem api.Memory + started bool + framep ptr32 // _PyInterpreterFrame* +} + +func (p *pystackiter) Next() bool { + if !p.started { + p.started = true + return p.framep != 0 + } + + oldframe := p.framep + p.framep = deref[ptr32](p.mem, p.framep+padPreviousInFrame) + if oldframe == p.framep { + fmt.Printf("frame previous field pointer to the same frame: %x", p.framep) + p.framep = 0 + return false + } + return p.framep != 0 +} + +func (p *pystackiter) ProgramCounter() experimental.ProgramCounter { + *p.counter += 1 + return experimental.ProgramCounter(*p.counter) +} + +func (p *pystackiter) Function() experimental.InternalFunction { + codep := deref[ptr32](p.mem, p.framep+padCodeInFrame) + return pyfuncall{ + file: derefPyUnicodeUtf8(p.mem, codep+padFilenameInCodeObject), + name: derefPyUnicodeUtf8(p.mem, codep+padNameInCodeObject), + addr: deref[uint32](p.mem, p.framep+padPrevInstrInFrame), + line: lineForFrame(p.mem, p.framep, codep), + } +} + +func (p *pystackiter) Parameters() []uint64 { + panic("TODO parameters()") +} + +// pyfuncall represent a specific place in the python source where a +// function call occurred. +type pyfuncall struct { + file string + name string + line int32 + addr uint32 + + api.FunctionDefinition // required for WazeroOnly +} + +func (f pyfuncall) Definition() api.FunctionDefinition { + return f +} + +func (f pyfuncall) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 { + panic("does not make sense") +} + +func (f pyfuncall) ModuleName() string { + return "" // TODO +} + +func (f pyfuncall) Index() uint32 { + return 42 // TODO +} + +func (f pyfuncall) Import() (string, string, bool) { + panic("implement me") +} + +func (f pyfuncall) ExportNames() []string { + panic("implement me") +} + +func (f pyfuncall) Name() string { + return f.name +} + +func (f pyfuncall) DebugName() string { + return f.name +} + +func (f pyfuncall) GoFunction() interface{} { + return nil +} + +func (f pyfuncall) ParamTypes() []api.ValueType { + panic("implement me") +} + +func (f pyfuncall) ParamNames() []string { + panic("implement me") +} + +func (f pyfuncall) ResultTypes() []api.ValueType { + panic("implement me") +} + +func (f pyfuncall) ResultNames() []string { + panic("implement me") +} + +// Return the utf8 encoding of a PyUnicode object. It is a +// re-implementation of PyUnicode_AsUTF8. The bytes are copied from +// the vmem, so the returned string is safe to use. +func pyUnicodeUTf8(m vmem, p ptr32) string { + statep := p + padStateInAsciiObject + state := deref[uint8](m, statep) + compact := state&(1<<5) > 0 + ascii := state&(1<<6) > 0 + if !compact || !ascii { + panic("only support ascii-compact utf8 representation") + } + + length := deref[int32](m, p+padLengthInAsciiObject) + bytes := derefArray[byte](m, p+sizeAsciiObject, uint32(length)) + return unsafe.String(unsafe.SliceData(bytes), len(bytes)) +} + +func derefPyUnicodeUtf8(m vmem, p ptr32) string { + x := deref[ptr32](m, p) + return pyUnicodeUTf8(m, x) +} + +func lineForFrame(m vmem, framep, codep ptr32) int32 { + codestart := codep + padCodeAdaptiveInCodeObject + previnstr := deref[ptr32](m, framep+padPrevInstrInFrame) + firstlineno := deref[int32](m, codep+padFirstlinenoInCodeObject) + + if previnstr < codestart { + return firstlineno + } + + linearray := deref[ptr32](m, codep+padLinearrayInCodeObject) + if linearray != 0 { + fmt.Println("LINEARRAY PANIC") + panic("can't handle code sections with line arrays") + } + + codebytes := deref[ptr32](m, codep+padLinetableInCodeObject) + if codebytes == 0 { + fmt.Println("CODEBYTES PANIC") + panic("code section must have a linetable") + } + + length := deref[int32](m, codebytes+padSizeInBytesObject) + linetable := codebytes + padSvalInBytesObject + addrq := int32(previnstr - codestart) + + lo_next := linetable // pointer to the current byte in the line table + limit := lo_next + ptr32(length) // pointer to the end of the linetable + ar_end := int32(0) // offset into the code section + computed_line := firstlineno // current known line number + ar_line := int32(-1) // line for the current bytecode + + for ar_end <= addrq && lo_next < limit { + lineDelta := int32(0) + ptr := lo_next + + entry := deref[uint8](m, ptr) + code := (entry >> 3) & 15 + switch code { + case enumCodeLocation1: + lineDelta = 1 + case enumCodeLocation2: + lineDelta = 2 + case enumCodeLocationNoCol, enumCodeLocationLong: + lineDelta = pysvarint(m, ptr+1) + } + + computed_line += lineDelta + + if (entry >> 3) == 0x1F { + ar_line = -1 + } else { + ar_line = computed_line + } + + ar_end += (int32(entry&7) + 1) * sizeCodeUnit + + lo_next++ + for lo_next < limit && (deref[uint8](m, lo_next)&128 == 0) { + lo_next++ + } + } + + return ar_line +} + +// Python-specific implementation of protobuf signed varints. However +// it only uses 7 bits, as python uses the most significant bit to +// store whether an entry starts on that byte. +func pysvarint(m vmem, p ptr32) int32 { + read := deref[uint8](m, p) + val := uint32(read & 63) + shift := 0 + for read&64 > 0 { + read = deref[uint8](m, p) + p++ + shift += 6 + val |= uint32(read&63) << shift + } + + x := int32(val >> 1) + if val&1 > 0 { + x = ^x + } + return x +} diff --git a/traceback.go b/traceback.go index d0d7c9f..d71c94f 100644 --- a/traceback.go +++ b/traceback.go @@ -70,11 +70,11 @@ type stkframe struct { // instruction in a function. Conventionally, we use pc-1 // for symbolic information, unless pc == fn.entry(), in // which case we use pc. - pc ptr - lr ptr // program counter at caller aka link register - sp ptr // stack pointer at pc - fp ptr // stack pointer at caller aka frame pointer - varp ptr // top of local variables + pc ptr64 + lr ptr64 // program counter at caller aka link register + sp ptr64 // stack pointer at pc + fp ptr64 // stack pointer at caller aka frame pointer + varp ptr64 // top of local variables } // unwindFlags control the behavior of various unwinders. @@ -155,8 +155,8 @@ const ( sysPCQuantum = 1 // https://github.com/golang/go/blob/49ad23a6d23d6cc1666c22e4bc215f25f717b569/src/internal/goarch/goarch_wasm.go ) -func (u *unwinder) initAt(pc0, sp0, lr0 ptr, gp gptr, flags unwindFlags) { - if pc0 == ptr(^uint64(0)) && sp0 == ptr(^uint64(0)) { +func (u *unwinder) initAt(pc0, sp0, lr0 ptr64, gp gptr, flags unwindFlags) { + if pc0 == ptr64(^uint64(0)) && sp0 == ptr64(^uint64(0)) { panic("should have been initialized") } @@ -167,7 +167,7 @@ func (u *unwinder) initAt(pc0, sp0, lr0 ptr, gp gptr, flags unwindFlags) { // If the PC is zero, it's likely a nil function call. // Start in the caller's frame. if frame.pc == 0 { - frame.pc = deref[ptr](u.mem, frame.sp) + frame.pc = deref[ptr64](u.mem, frame.sp) frame.sp += goarchPtrSize } @@ -234,7 +234,7 @@ func (u *unwinder) resolveInternal(innermost bool) { // We also defensively check that this won't switch M's on us, // which could happen at critical points in the scheduler. // This ensures gp.m doesn't change from a stack jump. - if u.flags&unwindJumpStack != 0 && gp == gMG0(u.mem, gp) && gMCurg(u.mem, gp) != 0 && ptr(gMCurg(u.mem, gp)) == gM(u.mem, gp) { + if u.flags&unwindJumpStack != 0 && gp == gMG0(u.mem, gp) && gMCurg(u.mem, gp) != 0 && ptr64(gMCurg(u.mem, gp)) == gM(u.mem, gp) { switch f.FuncID { case goruntime.FuncID_morestack: // morestack does not return normally -- newstack() @@ -259,7 +259,7 @@ func (u *unwinder) resolveInternal(innermost bool) { flag &^= goruntime.FuncFlagSPWrite } } - frame.fp = frame.sp + ptr(funcspdelta(f, frame.pc)) + frame.fp = frame.sp + ptr64(funcspdelta(f, frame.pc)) frame.fp += goarchPtrSize } @@ -291,10 +291,10 @@ func (u *unwinder) resolveInternal(innermost bool) { } } } else { - var lrPtr ptr + var lrPtr ptr64 if frame.lr == 0 { lrPtr = frame.fp - goarchPtrSize - frame.lr = deref[ptr](u.mem, lrPtr) + frame.lr = deref[ptr64](u.mem, lrPtr) } } @@ -350,13 +350,13 @@ func (u *unwinder) finishInternal() { u.frame.pc = 0 } -func funcspdelta(f funcInfo, targetpc ptr) int32 { +func funcspdelta(f funcInfo, targetpc ptr64) int32 { x, _ := pcvalue(f, f.Pcsp, targetpc) return x } // Returns the PCData value, and the PC where this value starts. -func pcvalue(f funcInfo, off uint32, targetpc ptr) (int32, ptr) { +func pcvalue(f funcInfo, off uint32, targetpc ptr64) (int32, ptr64) { if off == 0 { return -1, 0 } @@ -402,7 +402,7 @@ func pcvalue(f funcInfo, off uint32, targetpc ptr) (int32, ptr) { } // step advances to the next pc, value pair in the encoded table. -func step(p []byte, pc *ptr, val *int32, first bool) (newp []byte, ok bool) { +func step(p []byte, pc *ptr64, val *int32, first bool) (newp []byte, ok bool) { // For both uvdelta and pcdelta, the common case (~70%) // is that they are a single byte. If so, avoid calling readvarint. uvdelta := uint32(p[0]) @@ -422,7 +422,7 @@ func step(p []byte, pc *ptr, val *int32, first bool) (newp []byte, ok bool) { n, pcdelta = readvarint(p) } p = p[n:] - *pc += ptr(pcdelta * sysPCQuantum) + *pc += ptr64(pcdelta * sysPCQuantum) return p, true } @@ -454,7 +454,7 @@ type inlineUnwinder struct { symbols *pclntab mem vmem f funcInfo - inlTree ptr // Address of the array of inlinedCall entries + inlTree ptr64 // Address of the array of inlinedCall entries } // next returns the frame representing uf's logical caller. @@ -464,7 +464,7 @@ func (u *inlineUnwinder) next(uf inlineFrame) inlineFrame { return uf } c := derefArrayIndex[inlinedCall](u.mem, u.inlTree, uf.index) - return u.resolveInternal(u.f.entry() + ptr(c.parentPc)) + return u.resolveInternal(u.f.entry() + ptr64(c.parentPc)) } // srcFunc returns the srcFunc representing the given frame. @@ -481,7 +481,7 @@ func (u *inlineUnwinder) srcFunc(uf inlineFrame) srcFunc { } } -func (u *inlineUnwinder) resolveInternal(pc ptr) inlineFrame { +func (u *inlineUnwinder) resolveInternal(pc ptr64) inlineFrame { return inlineFrame{ pc: pc, // Conveniently, this returns -1 if there's an error, which is the same @@ -495,7 +495,7 @@ type inlineFrame struct { // pc is the PC giving the file/line metadata of the current frame. This is // always a "call PC" (not a "return PC"). This is 0 when the iterator is // exhausted. - pc ptr + pc ptr64 // index is the index of the current record in inlTree, or -1 if we are in // the outermost function. @@ -506,7 +506,7 @@ func (uf inlineFrame) valid() bool { return uf.pc != 0 } -func newInlineUnwinder(symbols *pclntab, mem vmem, f funcInfo, pc ptr) (inlineUnwinder, inlineFrame) { +func newInlineUnwinder(symbols *pclntab, mem vmem, f funcInfo, pc ptr64) (inlineUnwinder, inlineFrame) { inldataAddr := funcdata(symbols, f, goruntime.FUNCDATA_InlTree) if inldataAddr == 0 { return inlineUnwinder{symbols: symbols, mem: mem, f: f}, inlineFrame{pc: pc, index: -1} @@ -517,7 +517,7 @@ func newInlineUnwinder(symbols *pclntab, mem vmem, f funcInfo, pc ptr) (inlineUn // funcdata returns a pointer to the ith funcdata for f. // funcdata should be kept in sync with cmd/link:writeFuncs. -func funcdata(symbols *pclntab, f funcInfo, i uint8) ptr { +func funcdata(symbols *pclntab, f funcInfo, i uint8) ptr64 { if i >= f.Nfuncdata { return 0 } @@ -526,11 +526,11 @@ func funcdata(symbols *pclntab, f funcInfo, i uint8) ptr { // Return off == ^uint32(0) ? 0 : f.datap.gofunc + uintptr(off), but without branches. // The compiler calculates mask on most architectures using conditional assignment. - var mask ptr + var mask ptr64 if off == ^uint32(0) { mask = 1 } mask-- - raw := base + ptr(off) + raw := base + ptr64(off) return raw & mask } diff --git a/wzprof.go b/wzprof.go index 9abced4..46a3b7b 100644 --- a/wzprof.go +++ b/wzprof.go @@ -22,6 +22,7 @@ import ( type Profiling struct { wasm []byte + onlyFunctions map[string]struct{} filteredFunctions map[string]struct{} symbols symbolizer stackIterator func(mod api.Module, def api.FunctionDefinition, wasmsi experimental.StackIterator) experimental.StackIterator @@ -73,6 +74,13 @@ func ProfilingFor(wasm []byte) *Profiling { } } + // FIXME: detect python here + r.onlyFunctions = map[string]struct{}{ + // TODO: find the right functions to hook into + // "_PyEval_EvalFrameDefault": {}, + "_PyEvalFramePushAndInit": {}, + } + return r } @@ -109,14 +117,30 @@ func (p *Profiling) Prepare(mod wazero.CompiledModule) error { sp0 := uint32(imod.Global(0).Get()) gp0 := imod.Global(2).Get() pc0 := si.symbols.FIDToPC(fid(def.Index())) - si.initAt(ptr(pc0), ptr(sp0), 0, gptr(gp0), 0) + si.initAt(ptr64(pc0), ptr64(sp0), 0, gptr(gp0), 0) si.first = true return si } - } else { - p.symbols, _ = buildDwarfSymbolizer(mod) // TODO: surface error as warning? + return nil + } + + dwarf, err := newDwarfparser(mod) + if err != nil { + return nil // TODO: surface error as warning? } + if guessPython(dwarf) { + py, err := preparePython(dwarf) + if err != nil { + return err + } + p.symbols = py + p.stackIterator = py.Stackiter + return nil + } + + p.symbols = buildDwarfSymbolizer(dwarf) + return nil } From 671fe2e3e4e61e53a09db5dd8dea7d075a4e9bd5 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Fri, 7 Jul 2023 16:18:19 -0400 Subject: [PATCH 02/15] Fix off-by one parsing of signed varint --- python.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python.go b/python.go index e844cdd..7270e87 100644 --- a/python.go +++ b/python.go @@ -362,7 +362,7 @@ func pysvarint(m vmem, p ptr32) int32 { x := int32(val >> 1) if val&1 > 0 { - x = ^x + x = -x } return x } From 24b06e110b0fb5a4428bd5e524eb3b9f980ba13a Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Mon, 10 Jul 2023 12:18:54 -0400 Subject: [PATCH 03/15] Instrument vectorcall only --- wzprof.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/wzprof.go b/wzprof.go index 46a3b7b..2ee2a72 100644 --- a/wzprof.go +++ b/wzprof.go @@ -77,8 +77,9 @@ func ProfilingFor(wasm []byte) *Profiling { // FIXME: detect python here r.onlyFunctions = map[string]struct{}{ // TODO: find the right functions to hook into - // "_PyEval_EvalFrameDefault": {}, - "_PyEvalFramePushAndInit": {}, + // "_PyEval_EvalFrameDefault": {}, + // "_PyEvalFramePushAndInit": {}, + "PyObject_Vectorcall": {}, } return r From a11ead6807fa2b5ec59a67bc1f014417458b76b6 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Mon, 10 Jul 2023 12:19:08 -0400 Subject: [PATCH 04/15] Prefix function names with module --- python.go | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/python.go b/python.go index 7270e87..8ebceda 100644 --- a/python.go +++ b/python.go @@ -4,6 +4,8 @@ import ( "debug/dwarf" "encoding/binary" "fmt" + "path/filepath" + "strings" "unsafe" "github.com/tetratelabs/wazero/api" @@ -184,14 +186,33 @@ func (p *pystackiter) ProgramCounter() experimental.ProgramCounter { func (p *pystackiter) Function() experimental.InternalFunction { codep := deref[ptr32](p.mem, p.framep+padCodeInFrame) + line, _ := lineForFrame(p.mem, p.framep, codep) + file := derefPyUnicodeUtf8(p.mem, codep+padFilenameInCodeObject) + name := derefPyUnicodeUtf8(p.mem, codep+padNameInCodeObject) return pyfuncall{ - file: derefPyUnicodeUtf8(p.mem, codep+padFilenameInCodeObject), - name: derefPyUnicodeUtf8(p.mem, codep+padNameInCodeObject), + file: file, + name: functionName(file, name), addr: deref[uint32](p.mem, p.framep+padPrevInstrInFrame), - line: lineForFrame(p.mem, p.framep, codep), + line: line, } } +func functionName(path, function string) string { + mod := "" + const frozenPrefix = "" { + return mod + } + return mod + "." + function +} + func (p *pystackiter) Parameters() []uint64 { panic("TODO parameters()") } @@ -281,13 +302,13 @@ func derefPyUnicodeUtf8(m vmem, p ptr32) string { return pyUnicodeUTf8(m, x) } -func lineForFrame(m vmem, framep, codep ptr32) int32 { +func lineForFrame(m vmem, framep, codep ptr32) (int32, bool) { codestart := codep + padCodeAdaptiveInCodeObject previnstr := deref[ptr32](m, framep+padPrevInstrInFrame) firstlineno := deref[int32](m, codep+padFirstlinenoInCodeObject) if previnstr < codestart { - return firstlineno + return firstlineno, false } linearray := deref[ptr32](m, codep+padLinearrayInCodeObject) @@ -343,7 +364,7 @@ func lineForFrame(m vmem, framep, codep ptr32) int32 { } } - return ar_line + return ar_line, true } // Python-specific implementation of protobuf signed varints. However From 5d7fe27e2557d6bfeba3d0cfa4522ad8d60f0fd9 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Mon, 10 Jul 2023 14:55:58 -0400 Subject: [PATCH 05/15] Python version check --- dwarf.go | 33 ++++++++++++--- python.go | 117 ++++++++++++++++++++++++++++++++--------------------- wasmbin.go | 22 +++++----- wzprof.go | 40 ++++++++---------- 4 files changed, 129 insertions(+), 83 deletions(-) diff --git a/dwarf.go b/dwarf.go index 8f0a016..c8f416d 100644 --- a/dwarf.go +++ b/dwarf.go @@ -41,6 +41,14 @@ type dwarfmapper struct { onceSourceOffsetNotFound sync.Once } +const ( + debugInfo = ".debug_info" + debugLine = ".debug_line" + debugStr = ".debug_str" + debugAbbrev = ".debug_abbrev" + debugRanges = ".debug_ranges" +) + func newDwarfparser(module wazero.CompiledModule) (dwarfparser, error) { sections := module.CustomSections() @@ -48,15 +56,15 @@ func newDwarfparser(module wazero.CompiledModule) (dwarfparser, error) { for _, section := range sections { log.Printf("dwarf: found section %s", section.Name()) switch section.Name() { - case ".debug_info": + case debugInfo: info = section.Data() - case ".debug_line": + case debugLine: line = section.Data() - case ".debug_str": + case debugStr: str = section.Data() - case ".debug_abbrev": + case debugAbbrev: abbrev = section.Data() - case ".debug_ranges": + case debugRanges: ranges = section.Data() } } @@ -67,7 +75,22 @@ func newDwarfparser(module wazero.CompiledModule) (dwarfparser, error) { } r := d.Reader() + return dwarfparser{d: d, r: r}, nil +} + +func newDwarfParserFromBin(wasmbin []byte) (dwarfparser, error) { + info := wasmCustomSection(wasmbin, debugInfo) + line := wasmCustomSection(wasmbin, debugLine) + ranges := wasmCustomSection(wasmbin, debugRanges) + str := wasmCustomSection(wasmbin, debugStr) + abbrev := wasmCustomSection(wasmbin, debugAbbrev) + d, err := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str) + if err != nil { + return dwarfparser{}, fmt.Errorf("dwarf: %w", err) + } + + r := d.Reader() return dwarfparser{d: d, r: r}, nil } diff --git a/python.go b/python.go index 8ebceda..56f75e9 100644 --- a/python.go +++ b/python.go @@ -8,55 +8,71 @@ import ( "strings" "unsafe" + "github.com/tetratelabs/wazero" "github.com/tetratelabs/wazero/api" "github.com/tetratelabs/wazero/experimental" ) -// Heuristic to guess whether the wasm binary is actually CPython, based on its -// DWARF information. -// -// It loops over compile units to find one named "Programs/python.c". It should -// be fast since it's the first compile unit when we build CPython. -func guessPython(p dwarfparser) bool { +const ( + runtimeAddrName = "_PyRuntime" + versionAddrName = "Py_Version" +) + +func supportedPython(wasmbin []byte) bool { + p, err := newDwarfParserFromBin(wasmbin) + if err != nil { + return false + } + + versionAddr := pythonAddress(p, versionAddrName) + if versionAddr == 0 { + return false + } + + data := wasmdataSection(wasmbin) + if data == nil { + return false + } + + var versionhex uint32 + d := newDataIterator(data) for { - ent, err := p.r.Next() - if err != nil || ent == nil { + vaddr, seg := d.Next() + if seg == nil || vaddr > int64(versionAddr) { break } - if ent.Tag != dwarf.TagCompileUnit { - p.r.SkipChildren() + + end := vaddr + int64(len(seg)) + if int64(versionAddr)+4 >= end { continue } - name, _ := ent.Val(dwarf.AttrName).(string) - if name == "Programs/python.c" { - return true - } - p.r.SkipChildren() - } - return false -} -type python struct { - dwarf dwarfparser - pyrtaddr ptr32 + offset := int64(versionAddr) - vaddr + versionhex = binary.LittleEndian.Uint32(seg[offset:]) + break + } - counter uint64 + // see cpython patchlevel.h + major := (versionhex >> 24) & 0xFF + minor := (versionhex >> 16) & 0xFF + return major == 3 && minor == 11 } -func preparePython(dwarf dwarfparser) (*python, error) { - pyrtaddr := findPyRuntime(dwarf) - if pyrtaddr == 0 { - return nil, fmt.Errorf("could not find _PyRuntime address") +func preparePython(mod wazero.CompiledModule) (*python, error) { + p, err := newDwarfparser(mod) + if err != nil { + return nil, fmt.Errorf("could not build dwarf parser: %w", err) + } + runtimeAddr := pythonAddress(p, runtimeAddrName) + if runtimeAddr == 0 { + return nil, fmt.Errorf("could not find python runtime address") } return &python{ - dwarf: dwarf, - pyrtaddr: ptr32(pyrtaddr), + pyrtaddr: ptr32(runtimeAddr), }, nil } -// Find the address of the _PyRuntime symbol from the dwarf information. -// Returns 0 if not found. -func findPyRuntime(p dwarfparser) uint32 { +func pythonAddress(p dwarfparser, name string) uint32 { for { ent, err := p.r.Next() if err != nil || ent == nil { @@ -65,27 +81,36 @@ func findPyRuntime(p dwarfparser) uint32 { if ent.Tag != dwarf.TagVariable { continue } - name, _ := ent.Val(dwarf.AttrName).(string) - if name != "_PyRuntime" { + n, _ := ent.Val(dwarf.AttrName).(string) + if n != name { continue } - f := ent.AttrField(dwarf.AttrLocation) - if f == nil { - panic("_PyRuntime does not have a location") - } - if f.Class != dwarf.ClassExprLoc { - panic(fmt.Errorf("invalid location class: %s", f.Class)) - } - const DW_OP_addr = 0x3 - loc := f.Val.([]byte) - if len(loc) == 0 || loc[0] != DW_OP_addr { - panic(fmt.Errorf("unexpected address format: %X", loc)) - } - return binary.LittleEndian.Uint32(loc[1:]) + return getDwarfLocationAddress(ent) } return 0 } +type python struct { + pyrtaddr ptr32 + counter uint64 +} + +func getDwarfLocationAddress(ent *dwarf.Entry) uint32 { + f := ent.AttrField(dwarf.AttrLocation) + if f == nil { + return 0 + } + if f.Class != dwarf.ClassExprLoc { + panic(fmt.Errorf("invalid location class: %s", f.Class)) + } + const DW_OP_addr = 0x3 + loc := f.Val.([]byte) + if len(loc) == 0 || loc[0] != DW_OP_addr { + panic(fmt.Errorf("unexpected address format: %X", loc)) + } + return binary.LittleEndian.Uint32(loc[1:]) +} + // Padding of fields in various CPython structs. They are calculated // by writing a function in any CPython module, and executing it with // wazero. diff --git a/wasmbin.go b/wasmbin.go index fe0ec27..ec7652d 100644 --- a/wasmbin.go +++ b/wasmbin.go @@ -8,9 +8,14 @@ import ( // Returns true if the wasm module binary b contains a custom section with this // name. func wasmHasCustomSection(b []byte, name string) bool { + return wasmCustomSection(b, name) != nil +} + +// Returns the byte content of a custom section with name, or nil. +func wasmCustomSection(b []byte, name string) []byte { const customSectionId = 0 if len(b) < 8 { - return false + return nil } b = b[8:] // skip magic+version for len(b) > 2 { @@ -24,14 +29,14 @@ func wasmHasCustomSection(b []byte, name string) bool { b = b[n:] m := string(b[:nameLen]) if m == name { - return true + return b[nameLen : length-uint64(n)] } b = b[length-uint64(n):] } else { b = b[length:] } } - return false + return nil } // The functions in this file inspect the contents of a well-formed wasm-binary. @@ -65,8 +70,7 @@ type dataIterator struct { b []byte // remaining bytes in the Data section n uint64 // number of segments - // offset of b in the Data section. - offset int + offset int // offset of b in the Data section. } // newDataIterator prepares an iterator using the bytes of a well-formed data @@ -132,9 +136,9 @@ func (d *dataIterator) uvarint() uint64 { return x } -// Next returns the bytes of the following segment, and its offset in virtual +// Next returns the bytes of the following segment, and its address in virtual // memory, or a nil slice if there are no more segment. -func (d *dataIterator) Next() (offset int64, seg []byte) { +func (d *dataIterator) Next() (vaddr int64, seg []byte) { if d.n == 0 { return 0, nil } @@ -158,7 +162,7 @@ func (d *dataIterator) Next() (offset int64, seg []byte) { panic(fmt.Errorf("expected constant i32.const (0x41); got %#x", v)) } - offset = d.varint() + vaddr = d.varint() v = d.byte() if v != 0x0B { @@ -169,7 +173,7 @@ func (d *dataIterator) Next() (offset int64, seg []byte) { seg = d.read(int(length)) d.n-- - return offset, seg + return vaddr, seg } // SkipToDataOffset iterates over segments to return the bytes at a given data diff --git a/wzprof.go b/wzprof.go index 2ee2a72..c5fb3d0 100644 --- a/wzprof.go +++ b/wzprof.go @@ -27,7 +27,8 @@ type Profiling struct { symbols symbolizer stackIterator func(mod api.Module, def api.FunctionDefinition, wasmsi experimental.StackIterator) experimental.StackIterator - isGo bool + isGo bool + isPython bool } // ProfilingFor a given wasm binary. The resulting Profiling needs to be @@ -72,14 +73,13 @@ func ProfilingFor(wasm []byte) *Profiling { "memcmp": {}, "memchr": {}, } - } - - // FIXME: detect python here - r.onlyFunctions = map[string]struct{}{ - // TODO: find the right functions to hook into - // "_PyEval_EvalFrameDefault": {}, - // "_PyEvalFramePushAndInit": {}, - "PyObject_Vectorcall": {}, + } else if supportedPython(wasm) { + r.isPython = true + r.onlyFunctions = map[string]struct{}{ + // "_PyEval_EvalFrameDefault": {}, + // "_PyEvalFramePushAndInit": {}, + "PyObject_Vectorcall": {}, + } } return r @@ -122,26 +122,20 @@ func (p *Profiling) Prepare(mod wazero.CompiledModule) error { si.first = true return si } - return nil - } - - dwarf, err := newDwarfparser(mod) - if err != nil { - return nil // TODO: surface error as warning? - } - - if guessPython(dwarf) { - py, err := preparePython(dwarf) + } else if p.isPython { + py, err := preparePython(mod) if err != nil { return err } p.symbols = py p.stackIterator = py.Stackiter - return nil + } else { + dwarf, err := newDwarfparser(mod) + if err != nil { + return nil // TODO: surface error as warning? + } + p.symbols = buildDwarfSymbolizer(dwarf) } - - p.symbols = buildDwarfSymbolizer(dwarf) - return nil } From df3a49400c3a55605eda227cdfc908d460e8ddce Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Mon, 10 Jul 2023 14:59:39 -0400 Subject: [PATCH 06/15] Fix lint --- cpu.go | 4 ---- python.go | 16 ++++++++-------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/cpu.go b/cpu.go index 2bdeb3a..743a311 100644 --- a/cpu.go +++ b/cpu.go @@ -204,10 +204,7 @@ func (p *CPUProfiler) NewFunctionListener(def api.FunctionDefinition) experiment type cpuProfiler struct{ *CPUProfiler } -var mydepth = 0 - func (p cpuProfiler) Before(ctx context.Context, mod api.Module, def api.FunctionDefinition, _ []uint64, si experimental.StackIterator) { - mydepth++ var frame cpuTimeFrame p.mutex.Lock() @@ -232,7 +229,6 @@ func (p cpuProfiler) Before(ctx context.Context, mod api.Module, def api.Functio } func (p cpuProfiler) After(ctx context.Context, mod api.Module, def api.FunctionDefinition, _ []uint64) { - mydepth-- i := len(p.frames) - 1 f := p.frames[i] p.frames = p.frames[:i] diff --git a/python.go b/python.go index 56f75e9..223ffba 100644 --- a/python.go +++ b/python.go @@ -118,18 +118,18 @@ func getDwarfLocationAddress(ent *dwarf.Entry) uint32 { // TODO: look into using CGO and #import to generate them // instead. const ( - // _PyRuntimeState + // _PyRuntimeState. padTstateCurrentInRT = 360 - // PyThreadState + // PyThreadState. padCframeInThreadState = 40 - // _PyCFrame + // _PyCFrame. padCurrentFrameInCFrame = 4 - // _PyInterpreterFrame + // _PyInterpreterFrame. padPreviousInFrame = 24 padCodeInFrame = 16 padPrevInstrInFrame = 28 padOwnerInFrame = 37 - // PyCodeObject + // PyCodeObject. padFilenameInCodeObject = 80 padNameInCodeObject = 84 padCodeAdaptiveInCodeObject = 116 @@ -138,14 +138,14 @@ const ( padLinetableInCodeObject = 92 padFirstTraceableInCodeObject = 108 sizeCodeUnit = 2 - // PyASCIIObject + // PyASCIIObject. padStateInAsciiObject = 16 padLengthInAsciiObject = 8 sizeAsciiObject = 24 - // PyBytesObject + // PyBytesObject. padSvalInBytesObject = 16 padSizeInBytesObject = 8 - // Enum constants + // Enum constants. enumCodeLocation1 = 11 enumCodeLocation2 = 12 enumCodeLocationNoCol = 13 From 53d83612f100ccc5549289c1a35e885627326620 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Mon, 10 Jul 2023 15:14:27 -0400 Subject: [PATCH 07/15] Filter python memory allocation functions --- mem.go | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/mem.go b/mem.go index 3e6d4c0..1f18372 100644 --- a/mem.go +++ b/mem.go @@ -178,7 +178,38 @@ func (p *MemoryProfiler) NewHandler(sampleRate float64) http.Handler { // compilers and libraries. It uses the function name to detect memory // allocators, currently supporting libc, Go, and TinyGo. func (p *MemoryProfiler) NewFunctionListener(def api.FunctionDefinition) experimental.FunctionListener { - return nil // TODO + if p.p.isPython { + switch def.Name() { + // Raw domain + case "PyMem_RawMalloc": + return profilingListener{p.p, &mallocProfiler{memory: p}} + case "PyMem_RawCalloc": + return profilingListener{p.p, &callocProfiler{memory: p}} + case "PyMem_RawRealloc": + return profilingListener{p.p, &reallocProfiler{memory: p}} + case "PyMem_RawFree": + return profilingListener{p.p, &freeProfiler{memory: p}} + // Memory domain + case "PyMem_Malloc": + return profilingListener{p.p, &mallocProfiler{memory: p}} + case "PyMem_Calloc": + return profilingListener{p.p, &callocProfiler{memory: p}} + case "PyMem_Realloc": + return profilingListener{p.p, &reallocProfiler{memory: p}} + case "PyMem_Free": + return profilingListener{p.p, &freeProfiler{memory: p}} + // Object domain + case "PyObject_Malloc": + return profilingListener{p.p, &mallocProfiler{memory: p}} + case "PyObject_Calloc": + return profilingListener{p.p, &callocProfiler{memory: p}} + case "PyObject_Realloc": + return profilingListener{p.p, &reallocProfiler{memory: p}} + case "PyObject_Free": + return profilingListener{p.p, &freeProfiler{memory: p}} + } + return nil + } switch def.Name() { // C standard library, Rust case "malloc": From a8ea622a5774a9cfbc9b181af7b97a068fd1244d Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Tue, 11 Jul 2023 12:55:11 -0400 Subject: [PATCH 08/15] Provide unique program counter --- python.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/python.go b/python.go index 223ffba..0cfeb40 100644 --- a/python.go +++ b/python.go @@ -92,7 +92,6 @@ func pythonAddress(p dwarfparser, name string) uint32 { type python struct { pyrtaddr ptr32 - counter uint64 } func getDwarfLocationAddress(ent *dwarf.Entry) uint32 { @@ -159,8 +158,10 @@ func (p *python) Locations(fn experimental.InternalFunction, pc experimental.Pro loc := location{ File: call.file, Line: int64(call.line), + Column: 0, // TODO + Inlined: false, HumanName: call.name, - StableName: call.name, + StableName: call.file + "." + call.name, } return uint64(call.addr), []location{loc} @@ -174,7 +175,6 @@ func (p *python) Stackiter(mod api.Module, def api.FunctionDefinition, wasmsi ex return &pystackiter{ namedbg: def.DebugName(), - counter: &p.counter, mem: m, framep: framep, } @@ -182,7 +182,6 @@ func (p *python) Stackiter(mod api.Module, def api.FunctionDefinition, wasmsi ex type pystackiter struct { namedbg string - counter *uint64 mem api.Memory started bool framep ptr32 // _PyInterpreterFrame* @@ -205,8 +204,7 @@ func (p *pystackiter) Next() bool { } func (p *pystackiter) ProgramCounter() experimental.ProgramCounter { - *p.counter += 1 - return experimental.ProgramCounter(*p.counter) + return experimental.ProgramCounter(deref[uint32](p.mem, p.framep+padPrevInstrInFrame)) } func (p *pystackiter) Function() experimental.InternalFunction { From bcbc100195f9406ead6228803665956b5cfba9c6 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Tue, 11 Jul 2023 12:55:45 -0400 Subject: [PATCH 09/15] Clean up names of __init__ functions --- python.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python.go b/python.go index 0cfeb40..007fff2 100644 --- a/python.go +++ b/python.go @@ -226,6 +226,9 @@ func functionName(path, function string) string { if strings.HasPrefix(path, frozenPrefix) { mod = path[len(frozenPrefix) : len(path)-1] } else { + if strings.HasSuffix(path, "__init__.py") { + path = filepath.Dir(path) + } file := filepath.Base(path) mod = file[:len(file)-len(filepath.Ext(file))] } From aa23129f67befdc49edaa48f4ff8a0fbda0e8f3f Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Tue, 11 Jul 2023 16:34:07 -0400 Subject: [PATCH 10/15] Add tests for python Signed-off-by: Thomas Pelletier --- .gitignore | 1 + Makefile | 16 +++++++-- cmd/wzprof/main_test.go | 75 +++++++++++++++++++++++++++++---------- testdata/python/simple.py | 15 ++++++++ 4 files changed, 86 insertions(+), 21 deletions(-) create mode 100644 testdata/python/simple.py diff --git a/.gitignore b/.gitignore index 1d12f19..85fd53d 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ testdata/rust/*/target/* testdata/.sysroot/* *.pb.gz +.python diff --git a/Makefile b/Makefile index 9d6a40f..fbd1653 100644 --- a/Makefile +++ b/Makefile @@ -18,15 +18,17 @@ testdata.files = \ $(testdata.tinygo.wasm) \ $(testdata.wat.wasm) +python.files = .python/python.wasm .python/python311.zip + all: test clean: - rm -f $(testdata.files) + rm -f $(testdata.files) $(python.files) test: testdata go test ./... -testdata: wasi-libc $(testdata.files) +testdata: wasi-libc python $(testdata.files) testdata/.sysroot: mkdir -p testdata/.sysroot @@ -54,6 +56,16 @@ testdata/wat/%.wasm: testdata/wat/%.wat wasi-libc: testdata/.sysroot/lib/wasm32-wasi/libc.a +python: $(python.files) + +.python/python.wasm: + mkdir -p $(dir $@) + curl -fsSL https://timecraft.s3.amazonaws.com/python-vanilla/main/python.wasm -o $@ + +.python/python311.zip: + mkdir -p $(dir $@) + curl -fsSL https://timecraft.s3.amazonaws.com/python-vanilla/main/python311.zip -o $@ + .gitmodules: git submodule add --name wasi-libc -- \ 'https://github.com/WebAssembly/wasi-libc' testdata/.wasi-libc diff --git a/cmd/wzprof/main_test.go b/cmd/wzprof/main_test.go index 44de3e2..bc54525 100644 --- a/cmd/wzprof/main_test.go +++ b/cmd/wzprof/main_test.go @@ -16,7 +16,8 @@ import ( // that. func TestDataCSimple(t *testing.T) { - testMemoryProfiler(t, "../../testdata/c/simple.wasm", []sample{ + p := program{filePath: "../../testdata/c/simple.wasm"} + testMemoryProfiler(t, p, []sample{ { []int64{1, 10}, []frame{ @@ -53,7 +54,8 @@ func TestDataCSimple(t *testing.T) { } func TestDataRustSimple(t *testing.T) { - testMemoryProfiler(t, "../../testdata/rust/simple/target/wasm32-wasi/debug/simple.wasm", []sample{ + p := program{filePath: "../../testdata/rust/simple/target/wasm32-wasi/debug/simple.wasm"} + testMemoryProfiler(t, p, []sample{ { []int64{1, 120}, []frame{ @@ -89,10 +91,51 @@ func TestDataRustSimple(t *testing.T) { }) } +func TestPyTwoCalls(t *testing.T) { + pyd := t.TempDir() + pyzip := filepath.Join(pyd, "/usr/local/lib/python311.zip") + pyscript := filepath.Join(pyd, "script.py") + os.MkdirAll(filepath.Dir(pyzip), os.ModePerm) + os.Link("../../.python/python311.zip", pyzip) + os.Link("../../testdata/python/simple.py", pyscript) + + p := program{ + filePath: "../../.python/python.wasm", + args: []string{"/script.py"}, + mounts: []string{pyd + ":/"}, + } + + testCpuProfiler(t, p, []sample{ + { // deepest script.py call stack + []int64{1}, + []frame{ + {"script.a", 2, false}, + {"script.b", 7, false}, + {"script.c", 11, false}, + {"script", 15, false}, + }, + }, + }) + + testMemoryProfiler(t, p, []sample{ + // byterray(100) allocates 28 bytes for the object, and 100+1 byte for + // the content because in python byte arrays are null-terminated. + { + []int64{2, 129}, + []frame{ + {"script.a", 3, false}, + {"script.b", 7, false}, + {"script.c", 11, false}, + {"script", 15, false}, + }, + }, + }) +} + func TestGoTwoCalls(t *testing.T) { - wasm := "../../testdata/go/twocalls.wasm" + p := program{filePath: "../../testdata/go/twocalls.wasm"} - testCpuProfiler(t, wasm, []sample{ + testCpuProfiler(t, p, []sample{ { // first call to myalloc1() from main. []int64{1}, []frame{ @@ -129,7 +172,7 @@ func TestGoTwoCalls(t *testing.T) { }, }) - testMemoryProfiler(t, wasm, []sample{ + testMemoryProfiler(t, p, []sample{ { // first call to myalloc1() from main. []int64{1, 41}, []frame{ @@ -167,35 +210,29 @@ func TestGoTwoCalls(t *testing.T) { }) } -func testCpuProfiler(t *testing.T, path string, expectedSamples []sample) { - prog := &program{ - filePath: path, - sampleRate: 1, - cpuProfile: filepath.Join(t.TempDir(), "cpu.pprof"), - } +func testCpuProfiler(t *testing.T, prog program, expectedSamples []sample) { + prog.sampleRate = 1 + prog.cpuProfile = filepath.Join(t.TempDir(), "cpu.pprof") expectedTypes := []string{ "samples", "cpu", } - p := execForProfile(t, prog, prog.cpuProfile) + p := execForProfile(t, &prog, prog.cpuProfile) assertSamples(t, expectedTypes, expectedSamples, p) } -func testMemoryProfiler(t *testing.T, path string, expectedSamples []sample) { - prog := &program{ - filePath: path, - sampleRate: 1, - memProfile: filepath.Join(t.TempDir(), "mem.pprof"), - } +func testMemoryProfiler(t *testing.T, prog program, expectedSamples []sample) { + prog.sampleRate = 1 + prog.memProfile = filepath.Join(t.TempDir(), "mem.pprof") expectedTypes := []string{ "alloc_objects", "alloc_space", } - p := execForProfile(t, prog, prog.memProfile) + p := execForProfile(t, &prog, prog.memProfile) assertSamples(t, expectedTypes, expectedSamples, p) } diff --git a/testdata/python/simple.py b/testdata/python/simple.py new file mode 100644 index 0000000..5c61d79 --- /dev/null +++ b/testdata/python/simple.py @@ -0,0 +1,15 @@ +def a(): + print("world") + d = bytearray(100) + print(len(d)) + +def b(): + a() + +def c(): + print("hello") + b() + print("!") + +if __name__ == "__main__": + c() \ No newline at end of file From d926db666c47785ddb641aea2661c9600daacb36 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Tue, 11 Jul 2023 16:55:00 -0400 Subject: [PATCH 11/15] CI uses makes test --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ccc3afd..f5df6a3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -64,4 +64,4 @@ jobs: check-latest: true - name: Test - run: go test -v ./... + run: make test From 4f5be8f7e1f2356a78e2f823b7287dc816a8168e Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Tue, 11 Jul 2023 17:21:13 -0400 Subject: [PATCH 12/15] Change test command --- .github/workflows/build.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f5df6a3..686e666 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -63,5 +63,8 @@ jobs: go-version-file: .go-version check-latest: true + - name: Setup python + run: make python + - name: Test - run: make test + run: go test ./... From 3c9dec3b1b3868d0bc0aef2128cd49ef0f7c79ab Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Tue, 11 Jul 2023 17:39:07 -0400 Subject: [PATCH 13/15] Add language support to readme --- README.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/README.md b/README.md index 1406a78..6c9dfb6 100644 --- a/README.md +++ b/README.md @@ -186,6 +186,38 @@ The CPU time profiler measures the actual time spent on-CPU without taking into account the off-CPU time (e.g waiting for I/O). For this profiler, all the host-functions are considered off-CPU. +## Language support + +wzprof runs some heuristics to assess what the guest module is running to adapt +the way it symbolizes and walks the stack. In all other cases, it defaults to +inspecting the wasm stack and uses DWARF information if present in the module. + +### Golang + +If the guest has been compiled by golang/go 1.21+, wzprof inspects the memory +to walk the Go stack, which provides full call stacks, instead of the shortened +versions you would get without this support. + +In addition, wzprof parses pclntab to perform symbolization. This is the same +mechanism the Go runtime itself uses to display meaningful stack traces when a +panic occurs. + +### Python 3.11 + +If the guest is CPython 3.11 and has been compiled with debug symbols (such as +[timecraft's][timecraft-python]), wzprof walks the Python interpreter call +stack, not the C stack it would otherwise report. This provides more meaningful +profiling information on the script being executed. + +At the moment it does not support merging the C extension calls into the Python +interpreter stack. + +Note that a current limitation of the implementation is that unloading or +reloading modules may result in an incorrect profile. If that's a problem for +you please file an issue in the github tracker. + +[timecraft-python]: https://docs.timecraft.dev/getting-started/prep-application/compiling-python#preparing-python + ## Contributing Pull requests are welcome! Anything that is not a simple fix would probably From b957105425b1f0a85fada1db63c1dba7c570f34f Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Tue, 11 Jul 2023 18:15:45 -0400 Subject: [PATCH 14/15] Feedback --- mem.go | 2 +- python.go | 3 --- wzprof.go | 27 +++++++++++++++++++-------- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/mem.go b/mem.go index 1f18372..f0101a6 100644 --- a/mem.go +++ b/mem.go @@ -178,7 +178,7 @@ func (p *MemoryProfiler) NewHandler(sampleRate float64) http.Handler { // compilers and libraries. It uses the function name to detect memory // allocators, currently supporting libc, Go, and TinyGo. func (p *MemoryProfiler) NewFunctionListener(def api.FunctionDefinition) experimental.FunctionListener { - if p.p.isPython { + if p.p.lang == python311 { switch def.Name() { // Raw domain case "PyMem_RawMalloc": diff --git a/python.go b/python.go index 007fff2..0929d7d 100644 --- a/python.go +++ b/python.go @@ -196,7 +196,6 @@ func (p *pystackiter) Next() bool { oldframe := p.framep p.framep = deref[ptr32](p.mem, p.framep+padPreviousInFrame) if oldframe == p.framep { - fmt.Printf("frame previous field pointer to the same frame: %x", p.framep) p.framep = 0 return false } @@ -339,13 +338,11 @@ func lineForFrame(m vmem, framep, codep ptr32) (int32, bool) { linearray := deref[ptr32](m, codep+padLinearrayInCodeObject) if linearray != 0 { - fmt.Println("LINEARRAY PANIC") panic("can't handle code sections with line arrays") } codebytes := deref[ptr32](m, codep+padLinetableInCodeObject) if codebytes == 0 { - fmt.Println("CODEBYTES PANIC") panic("code section must have a linetable") } diff --git a/wzprof.go b/wzprof.go index c5fb3d0..4261d40 100644 --- a/wzprof.go +++ b/wzprof.go @@ -27,10 +27,17 @@ type Profiling struct { symbols symbolizer stackIterator func(mod api.Module, def api.FunctionDefinition, wasmsi experimental.StackIterator) experimental.StackIterator - isGo bool - isPython bool + lang language } +type language int8 + +const ( + unknown language = iota + golang + python311 +) + // ProfilingFor a given wasm binary. The resulting Profiling needs to be // prepared after Wazero module compilation. func ProfilingFor(wasm []byte) *Profiling { @@ -43,7 +50,7 @@ func ProfilingFor(wasm []byte) *Profiling { } if binCompiledByGo(wasm) { - r.isGo = true + r.lang = golang // Those functions are special. They use a different calling // convention. Their call sites do not update the stack pointer, // which makes it impossible to correctly walk the stack. @@ -74,11 +81,14 @@ func ProfilingFor(wasm []byte) *Profiling { "memchr": {}, } } else if supportedPython(wasm) { - r.isPython = true + r.lang = python311 r.onlyFunctions = map[string]struct{}{ + "PyObject_Vectorcall": {}, + // Those functions are also likely candidate for useful profiling. + // We may need to look into them if someone reports missing frames. + // // "_PyEval_EvalFrameDefault": {}, // "_PyEvalFramePushAndInit": {}, - "PyObject_Vectorcall": {}, } } @@ -100,7 +110,8 @@ func (p *Profiling) MemoryProfiler(options ...MemoryProfilerOption) *MemoryProfi // Prepare selects the most appropriate analysis functions for the guest // code in the provided module. func (p *Profiling) Prepare(mod wazero.CompiledModule) error { - if p.isGo { + switch p.lang { + case golang: s, err := preparePclntabSymbolizer(p.wasm, mod) if err != nil { return err @@ -122,14 +133,14 @@ func (p *Profiling) Prepare(mod wazero.CompiledModule) error { si.first = true return si } - } else if p.isPython { + case python311: py, err := preparePython(mod) if err != nil { return err } p.symbols = py p.stackIterator = py.Stackiter - } else { + default: dwarf, err := newDwarfparser(mod) if err != nil { return nil // TODO: surface error as warning? From bb0d5fce62f3e4cb681facec635b94c88d2f6df7 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Wed, 12 Jul 2023 09:21:43 -0400 Subject: [PATCH 15/15] Explain the 2 calls in test --- cmd/wzprof/main_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/wzprof/main_test.go b/cmd/wzprof/main_test.go index bc54525..2a52a49 100644 --- a/cmd/wzprof/main_test.go +++ b/cmd/wzprof/main_test.go @@ -119,7 +119,8 @@ func TestPyTwoCalls(t *testing.T) { testMemoryProfiler(t, p, []sample{ // byterray(100) allocates 28 bytes for the object, and 100+1 byte for - // the content because in python byte arrays are null-terminated. + // the content because in python byte arrays are null-terminated. It + // first calls PyObject_Malloc(28), then PyObject_Realloc(101). { []int64{2, 129}, []frame{