From 2870ba854372f8aabe694b42bbb91d8f46111414 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 03:27:34 +0800 Subject: [PATCH 01/59] build: add Go-style pclntab findfunc index --- internal/build/pclntab.go | 134 +++++++++++++++++++++++++++++++++ internal/build/pclntab_test.go | 83 ++++++++++++++++++++ 2 files changed, 217 insertions(+) create mode 100644 internal/build/pclntab.go create mode 100644 internal/build/pclntab_test.go diff --git a/internal/build/pclntab.go b/internal/build/pclntab.go new file mode 100644 index 0000000000..60c2c97416 --- /dev/null +++ b/internal/build/pclntab.go @@ -0,0 +1,134 @@ +//go:build !llgo +// +build !llgo + +package build + +import ( + "fmt" + + llvm "github.com/xgo-dev/llvm" +) + +const ( + pclnMinFuncSize = uint32(16) + pclnFuncTabBucketSize = uint32(256) * pclnMinFuncSize + pclnFindFuncSubbucket = 16 +) + +type pclnFuncTabEntry struct { + entryOff uint32 + funcOff uint32 +} + +type pclnFindFuncBucket struct { + idx uint32 + subbuckets [pclnFindFuncSubbucket]uint8 +} + +func buildPCLnFindFuncBuckets(ftab []pclnFuncTabEntry, textSize uint32) ([]pclnFindFuncBucket, error) { + if textSize == 0 { + return nil, nil + } + if len(ftab) < 2 { + return nil, fmt.Errorf("pclntab ftab needs at least one function and one sentinel") + } + for i := 1; i < len(ftab); i++ { + if ftab[i].entryOff <= ftab[i-1].entryOff { + return nil, fmt.Errorf("pclntab ftab entries must be strictly increasing") + } + } + if ftab[0].entryOff != 0 { + return nil, fmt.Errorf("pclntab first entry offset must be zero") + } + if ftab[len(ftab)-1].entryOff < textSize { + return nil, fmt.Errorf("pclntab sentinel offset %d below text size %d", ftab[len(ftab)-1].entryOff, textSize) + } + + nbuckets := int((textSize + pclnFuncTabBucketSize - 1) / pclnFuncTabBucketSize) + buckets := make([]pclnFindFuncBucket, nbuckets) + subSize := pclnFuncTabBucketSize / pclnFindFuncSubbucket + for b := range buckets { + bucketStart := uint32(b) * pclnFuncTabBucketSize + baseIdx := pclnFuncIndexForPC(ftab, bucketStart) + buckets[b].idx = uint32(baseIdx) + for s := 0; s < pclnFindFuncSubbucket; s++ { + pc := bucketStart + uint32(s)*subSize + if pc >= textSize { + pc = textSize - 1 + } + subIdx := pclnFuncIndexForPC(ftab, pc) + delta := subIdx - baseIdx + if delta < 0 || delta > 255 { + return nil, fmt.Errorf("pclntab subbucket delta overflow: bucket=%d subbucket=%d delta=%d", b, s, delta) + } + buckets[b].subbuckets[s] = uint8(delta) + } + } + return buckets, nil +} + +func pclnFuncIndexForPC(ftab []pclnFuncTabEntry, pcOff uint32) int { + lo, hi := 0, len(ftab)-1 // last entry is the sentinel. + for lo+1 < hi { + mid := int(uint(lo+hi) >> 1) + if ftab[mid].entryOff <= pcOff { + lo = mid + } else { + hi = mid + } + } + for lo+1 < len(ftab) && ftab[lo+1].entryOff <= pcOff { + lo++ + } + if lo >= len(ftab)-1 { + return len(ftab) - 2 + } + return lo +} + +func pclnLookupFuncIndex(ftab []pclnFuncTabEntry, buckets []pclnFindFuncBucket, pcOff uint32) int { + if len(ftab) < 2 || len(buckets) == 0 { + return -1 + } + bucket := pcOff / pclnFuncTabBucketSize + if bucket >= uint32(len(buckets)) { + return -1 + } + subSize := pclnFuncTabBucketSize / pclnFindFuncSubbucket + sub := (pcOff % pclnFuncTabBucketSize) / subSize + b := buckets[bucket] + idx := int(b.idx) + int(b.subbuckets[sub]) + for idx+1 < len(ftab) && ftab[idx+1].entryOff <= pcOff { + idx++ + } + if idx >= len(ftab)-1 { + return len(ftab) - 2 + } + return idx +} + +func emitPCLnFindFuncBuckets(mod llvm.Module, symbol string, buckets []pclnFindFuncBucket) llvm.Value { + ctx := mod.Context() + i8Type := ctx.Int8Type() + i32Type := ctx.Int32Type() + subType := llvm.ArrayType(i8Type, pclnFindFuncSubbucket) + bucketType := ctx.StructType([]llvm.Type{i32Type, subType}, false) + arrayType := llvm.ArrayType(bucketType, len(buckets)) + values := make([]llvm.Value, 0, len(buckets)) + for _, bucket := range buckets { + subs := make([]llvm.Value, 0, len(bucket.subbuckets)) + for _, sub := range bucket.subbuckets { + subs = append(subs, llvm.ConstInt(i8Type, uint64(sub), false)) + } + values = append(values, llvm.ConstNamedStruct(bucketType, []llvm.Value{ + llvm.ConstInt(i32Type, uint64(bucket.idx), false), + llvm.ConstArray(i8Type, subs), + })) + } + global := llvm.AddGlobal(mod, arrayType, symbol) + global.SetInitializer(llvm.ConstArray(bucketType, values)) + global.SetGlobalConstant(true) + global.SetUnnamedAddr(true) + global.SetAlignment(4) + return global +} diff --git a/internal/build/pclntab_test.go b/internal/build/pclntab_test.go new file mode 100644 index 0000000000..5eef7ffdfe --- /dev/null +++ b/internal/build/pclntab_test.go @@ -0,0 +1,83 @@ +//go:build !llgo +// +build !llgo + +package build + +import ( + "strings" + "testing" + + llvm "github.com/xgo-dev/llvm" +) + +func TestBuildPCLnFindFuncBucketsLookup(t *testing.T) { + ftab := []pclnFuncTabEntry{ + {entryOff: 0, funcOff: 11}, + {entryOff: 16, funcOff: 22}, + {entryOff: 64, funcOff: 33}, + {entryOff: 4096, funcOff: 44}, + {entryOff: 4352, funcOff: 55}, + {entryOff: 8192, funcOff: 0}, // sentinel + } + buckets, err := buildPCLnFindFuncBuckets(ftab, 8192) + if err != nil { + t.Fatalf("buildPCLnFindFuncBuckets: %v", err) + } + if got, want := len(buckets), 2; got != want { + t.Fatalf("bucket count = %d, want %d", got, want) + } + for _, tt := range []struct { + pc uint32 + want int + }{ + {pc: 0, want: 0}, + {pc: 15, want: 0}, + {pc: 16, want: 1}, + {pc: 63, want: 1}, + {pc: 64, want: 2}, + {pc: 4095, want: 2}, + {pc: 4096, want: 3}, + {pc: 4351, want: 3}, + {pc: 4352, want: 4}, + {pc: 8191, want: 4}, + } { + if got := pclnLookupFuncIndex(ftab, buckets, tt.pc); got != tt.want { + t.Fatalf("lookup(%d) = %d, want %d", tt.pc, got, tt.want) + } + } +} + +func TestBuildPCLnFindFuncBucketsRejectsOverflow(t *testing.T) { + ftab := make([]pclnFuncTabEntry, 0, 302) + for i := 0; i < 301; i++ { + ftab = append(ftab, pclnFuncTabEntry{entryOff: uint32(i), funcOff: uint32(i + 1)}) + } + ftab = append(ftab, pclnFuncTabEntry{entryOff: pclnFuncTabBucketSize, funcOff: 0}) + if _, err := buildPCLnFindFuncBuckets(ftab, pclnFuncTabBucketSize); err == nil { + t.Fatal("expected subbucket overflow error") + } +} + +func TestEmitPCLnFindFuncBuckets(t *testing.T) { + llvm.InitializeAllTargets() + ctx := llvm.NewContext() + defer ctx.Dispose() + mod := ctx.NewModule("pclntab-test") + defer mod.Dispose() + + buckets := []pclnFindFuncBucket{ + {idx: 0, subbuckets: [16]uint8{0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}}, + {idx: 3, subbuckets: [16]uint8{0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, + } + emitPCLnFindFuncBuckets(mod, "__llgo_findfunctab", buckets) + ir := mod.String() + for _, want := range []string{ + `@__llgo_findfunctab = unnamed_addr constant [2 x { i32, [16 x i8] }]`, + `{ i32 0, [16 x i8] c"\00\01\02`, + `{ i32 3, [16 x i8] c"\00\00\01`, + } { + if !strings.Contains(ir, want) { + t.Fatalf("IR missing %q:\n%s", want, ir) + } + } +} From f365b73fd8b42ddc3ffe33bc1a4b7cf0a8a4a1f7 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Sun, 28 Jun 2026 18:56:09 +0800 Subject: [PATCH 02/59] ssa: emit DCE-safe function metadata --- cl/cltest/cltest.go | 12 ++- cl/compile.go | 8 ++ cl/funcinfo_metadata_test.go | 142 +++++++++++++++++++++++++++++++++++ ssa/funcinfo.go | 63 ++++++++++++++++ ssa/package.go | 2 + ssa/ssa_test.go | 98 ++++++++++++++++++++++++ 6 files changed, 323 insertions(+), 2 deletions(-) create mode 100644 cl/funcinfo_metadata_test.go create mode 100644 ssa/funcinfo.go diff --git a/cl/cltest/cltest.go b/cl/cltest/cltest.go index 51f199bf94..d70ab47672 100644 --- a/cl/cltest/cltest.go +++ b/cl/cltest/cltest.go @@ -540,7 +540,7 @@ func filterRunOutput(in []byte) []byte { return out.Bytes() } -func TestCompileEx(t *testing.T, src any, fname, expected string, dbg bool) { +func CompileIREx(t *testing.T, src any, fname string, dbg bool, configure func(llssa.Program)) string { t.Helper() fset := token.NewFileSet() f, err := parser.ParseFile(fset, fname, src, parser.ParseComments) @@ -563,13 +563,21 @@ func TestCompileEx(t *testing.T, src any, fname, expected string, dbg bool) { foo.WriteTo(os.Stderr) prog := ssatest.NewProgramEx(t, nil, imp) prog.TypeSizes(types.SizesFor("gc", runtime.GOARCH)) + if configure != nil { + configure(prog) + } ret, err := cl.NewPackage(prog, foo, files) if err != nil { t.Fatal("cl.NewPackage failed:", err) } + return ret.String() +} - if v := ret.String(); llssa.StripModuleTarget(v) != expected && expected != ";" { // expected == ";" means skipping out.ll +func TestCompileEx(t *testing.T, src any, fname, expected string, dbg bool) { + t.Helper() + v := CompileIREx(t, src, fname, dbg, nil) + if llssa.StripModuleTarget(v) != expected && expected != ";" { // expected == ";" means skipping out.ll t.Fatalf("\n==> got:\n%s\n==> expected:\n%s\n", v, expected) } } diff --git a/cl/compile.go b/cl/compile.go index 434a25d773..4de05558d0 100644 --- a/cl/compile.go +++ b/cl/compile.go @@ -476,6 +476,14 @@ func (p *context) compileFuncDecl(pkg llssa.Package, f *ssa.Function) (llssa.Fun p.funcs[f] = fn isCgo := isCgoExternSymbol(f) if nblk := len(f.Blocks); nblk > 0 { + if p.prog.FuncInfoMetadataEnabled() { + goName := fn.Name() + if pkgTypes != nil { + goName = funcName(pkgTypes, f, false) + } + pos := p.goProg.Fset.Position(f.Pos()) + pkg.EmitFuncInfo(fn.Name(), goName, pos.Filename, pos.Line, pos.Column) + } var childInits []func() if len(f.AnonFuncs) > 0 { parentInits := p.inits diff --git a/cl/funcinfo_metadata_test.go b/cl/funcinfo_metadata_test.go new file mode 100644 index 0000000000..5b39e3a2cc --- /dev/null +++ b/cl/funcinfo_metadata_test.go @@ -0,0 +1,142 @@ +//go:build !llgo +// +build !llgo + +/* + * Copyright (c) 2024 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cl_test + +import ( + "regexp" + "strconv" + "strings" + "testing" + + "github.com/goplus/llgo/cl/cltest" + llssa "github.com/goplus/llgo/ssa" +) + +type funcInfoRecord struct { + symbol string + name string + file string + line int + column int +} + +func TestFuncInfoMetadataEmission(t *testing.T) { + const src = `package foo + +type T struct{} + +func top() { + _ = func() int { return leaf() }() +} + +func leaf() int { return 1 } + +func (T) method() {} +` + ir := cltest.CompileIREx(t, src, "foo.go", false, func(prog llssa.Program) { + prog.EnableFuncInfoMetadata(true) + }) + + for _, want := range []string{ + `!llgo.funcinfo = !{!`, + `!"foo.top"`, + `!"foo.top$1"`, + `!"foo.T.method"`, + `!"foo.go"`, + } { + if !strings.Contains(ir, want) { + t.Fatalf("missing funcinfo metadata %s:\n%s", want, ir) + } + } + if strings.Contains(ir, "llvm.compiler.used") { + t.Fatalf("funcinfo metadata should not add llvm.compiler.used:\n%s", ir) + } + if strings.Contains(ir, `ptr @"foo.top"`) || strings.Contains(ir, `ptr @foo.top`) { + t.Fatalf("funcinfo metadata should use symbol strings, not function pointers:\n%s", ir) + } + + records := parseFuncInfoRecords(t, ir) + stackSymbols := []string{"foo.leaf", "foo.top$1", "foo.top"} + for _, symbol := range stackSymbols { + record, ok := records[symbol] + if !ok { + t.Fatalf("stack symbol %q not found in funcinfo metadata: %#v", symbol, records) + } + if record.name == "" || record.file != "foo.go" || record.line <= 0 || record.column <= 0 { + t.Fatalf("bad funcinfo for stack symbol %q: %#v", symbol, record) + } + } + if got := records["foo.leaf"].name; got != "foo.leaf" { + t.Fatalf("leaf stack frame name = %q, want foo.leaf", got) + } + if got := records["foo.top$1"].name; got != "foo.top$1" { + t.Fatalf("closure stack frame name = %q, want foo.top$1", got) + } + if got := records["foo.top"].name; got != "foo.top" { + t.Fatalf("caller stack frame name = %q, want foo.top", got) + } +} + +func parseFuncInfoRecords(t *testing.T, ir string) map[string]funcInfoRecord { + t.Helper() + + listRE := regexp.MustCompile(`!llgo\.funcinfo = !\{([^}]*)\}`) + listMatch := listRE.FindStringSubmatch(ir) + if listMatch == nil { + t.Fatalf("missing funcinfo metadata list:\n%s", ir) + } + refRE := regexp.MustCompile(`!(\d+)`) + refs := refRE.FindAllStringSubmatch(listMatch[1], -1) + if len(refs) == 0 { + t.Fatalf("empty funcinfo metadata list:\n%s", ir) + } + wantRefs := make(map[string]bool, len(refs)) + for _, ref := range refs { + wantRefs[ref[1]] = true + } + + rowRE := regexp.MustCompile(`^!(\d+) = !\{i32 1, !"([^"]+)", !"([^"]+)", !"([^"]*)", i32 ([0-9]+), i32 ([0-9]+)\}$`) + records := make(map[string]funcInfoRecord) + for _, line := range strings.Split(ir, "\n") { + row := rowRE.FindStringSubmatch(line) + if row == nil || !wantRefs[row[1]] { + continue + } + lineNo, err := strconv.Atoi(row[5]) + if err != nil { + t.Fatalf("bad funcinfo line in %q: %v", line, err) + } + column, err := strconv.Atoi(row[6]) + if err != nil { + t.Fatalf("bad funcinfo column in %q: %v", line, err) + } + records[row[2]] = funcInfoRecord{ + symbol: row[2], + name: row[3], + file: row[4], + line: lineNo, + column: column, + } + } + if len(records) != len(wantRefs) { + t.Fatalf("parsed %d funcinfo records, want %d:\n%s", len(records), len(wantRefs), ir) + } + return records +} diff --git a/ssa/funcinfo.go b/ssa/funcinfo.go new file mode 100644 index 0000000000..734399093d --- /dev/null +++ b/ssa/funcinfo.go @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2024 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ssa + +import "github.com/xgo-dev/llvm" + +const ( + FuncInfoMetadataName = "llgo.funcinfo" + funcInfoVersion = 1 +) + +// EnableFuncInfoMetadata controls emission of DCE-safe function source +// metadata. The metadata intentionally stores symbol names as strings instead +// of function pointer operands, so it can be consumed before materializing a +// final runtime line/func table without keeping otherwise-dead functions alive. +func (p Program) EnableFuncInfoMetadata(enable bool) { + p.enableFuncInfoMetadata = enable +} + +func (p Program) FuncInfoMetadataEnabled() bool { + return p.enableFuncInfoMetadata +} + +// EmitFuncInfo records a function's linker symbol, Go name, and declaration +// source position as LLVM named metadata. The row layout is: +// +// !{i32 version, !"symbol", !"go.name", !"file", i32 line, i32 column} +func (p Package) EmitFuncInfo(symbol, name, file string, line, column int) { + if symbol == "" { + return + } + if line < 0 { + line = 0 + } + if column < 0 { + column = 0 + } + i32 := p.Prog.Int32().ll + p.mod.AddNamedMetadataOperand(FuncInfoMetadataName, + p.Prog.ctx.MDNode([]llvm.Metadata{ + llvm.ConstInt(i32, funcInfoVersion, false).ConstantAsMetadata(), + p.Prog.ctx.MDString(symbol), + p.Prog.ctx.MDString(name), + p.Prog.ctx.MDString(file), + llvm.ConstInt(i32, uint64(line), false).ConstantAsMetadata(), + llvm.ConstInt(i32, uint64(column), false).ConstantAsMetadata(), + }), + ) +} diff --git a/ssa/package.go b/ssa/package.go index 74256eddcc..f0f118d639 100644 --- a/ssa/package.go +++ b/ssa/package.go @@ -233,6 +233,8 @@ type aProgram struct { is32Bits bool enableGoGlobalDCE bool + + enableFuncInfoMetadata bool } type AbiSymbol struct { diff --git a/ssa/ssa_test.go b/ssa/ssa_test.go index 74d4242286..378bde70ff 100644 --- a/ssa/ssa_test.go +++ b/ssa/ssa_test.go @@ -200,6 +200,104 @@ func TestNewFuncExLLVMUsed(t *testing.T) { } } +func TestFuncInfoMetadataDoesNotPreserveFunctions(t *testing.T) { + testFuncInfoMetadataDoesNotPreserveFunctions(t) +} + +func testFuncInfoMetadataDoesNotPreserveFunctions(t *testing.T) { + t.Helper() + + prog := NewProgram(nil) + if prog.FuncInfoMetadataEnabled() { + t.Fatal("funcinfo metadata should be disabled by default") + } + prog.EnableFuncInfoMetadata(true) + if !prog.FuncInfoMetadataEnabled() { + t.Fatal("funcinfo metadata should be enabled") + } + + pkg := prog.NewPackage("main", "main") + sig := types.NewSignatureType(nil, nil, nil, nil, nil, false) + + pkg.NewFunc("main.unused", sig, InGo) + pkg.EmitFuncInfo("", "ignored", "ignored.go", -1, -1) + if ir := pkg.String(); strings.Contains(ir, FuncInfoMetadataName) { + t.Fatalf("empty symbol should not emit funcinfo metadata:\n%s", ir) + } + + pkg.EmitFuncInfo("main.unused", "main.unused", "unused.go", 7, 1) + pkg.EmitFuncInfo("main.negative", "main.negative", "negative.go", -7, -1) + ir := pkg.String() + + if !strings.Contains(ir, `!llgo.funcinfo = !{!`) { + t.Fatalf("missing %s metadata:\n%s", FuncInfoMetadataName, ir) + } + for _, want := range []string{`!"main.unused"`, `!"unused.go"`, `i32 7`, `!"main.negative"`, `!"negative.go"`, `i32 0`} { + if !strings.Contains(ir, want) { + t.Fatalf("missing funcinfo field %s:\n%s", want, ir) + } + } + if strings.Contains(ir, "llvm.compiler.used") { + t.Fatalf("funcinfo metadata must not preserve symbols with llvm.compiler.used:\n%s", ir) + } + if strings.Contains(ir, `ptr @"main.unused"`) || strings.Contains(ir, `ptr @main.unused`) { + t.Fatalf("funcinfo metadata must not contain function pointer operands:\n%s", ir) + } +} + +func TestFuncInfoMetadataDoesNotBlockGlobalDCE(t *testing.T) { + testFuncInfoMetadataDoesNotBlockGlobalDCE(t) +} + +func testFuncInfoMetadataDoesNotBlockGlobalDCE(t *testing.T) { + t.Helper() + + prog := NewProgram(nil) + pkg := prog.NewPackage("main", "main") + sig := types.NewSignatureType(nil, nil, nil, nil, nil, false) + + live := pkg.NewFunc("main.main", sig, InGo) + lb := live.MakeBody(1) + lb.Return() + lb.EndBuild() + + unused := pkg.NewFuncEx("main.unused", sig, InGo, false, true) + ub := unused.MakeBody(1) + ub.Return() + ub.EndBuild() + pkg.EmitFuncInfo(unused.Name(), unused.Name(), "unused.go", 7, 1) + + mod := pkg.Module() + if mod.NamedFunction("main.unused").IsNil() { + t.Fatal("missing main.unused before DCE") + } + mod.SetDataLayout(prog.DataLayout()) + mod.SetTarget(prog.Target().Spec().Triple) + pbo := llvm.NewPassBuilderOptions() + defer pbo.Dispose() + if err := llvm.VerifyModule(mod, llvm.ReturnStatusAction); err != nil { + t.Fatalf("verify module before DCE: %v", err) + } + if err := mod.RunPasses("globaldce", prog.TargetMachine(), pbo); err != nil { + t.Fatalf("run globaldce: %v", err) + } + if !mod.NamedFunction("main.unused").IsNil() { + t.Fatalf("funcinfo metadata kept main.unused alive:\n%s", mod.String()) + } + if mod.NamedFunction("main.main").IsNil() { + t.Fatalf("globaldce removed externally visible live function:\n%s", mod.String()) + } + if ir := mod.String(); !strings.Contains(ir, `!"main.unused"`) { + t.Fatalf("funcinfo metadata should remain available for later materialization:\n%s", ir) + } +} + +func TestDevLTOGlobalDCEFuncInfoMetadata(t *testing.T) { + requireGoGlobalDCE(t) + testFuncInfoMetadataDoesNotPreserveFunctions(t) + testFuncInfoMetadataDoesNotBlockGlobalDCE(t) +} + func requireGoGlobalDCE(t *testing.T) { t.Helper() } From e81a007d2d039c2f081a2e9cd433db718aabd9ef Mon Sep 17 00:00:00 2001 From: Li Jie Date: Mon, 29 Jun 2026 16:08:50 +0800 Subject: [PATCH 03/59] runtime: add line info for stack frames --- cl/cltest/cltest.go | 35 ++- cl/compile.go | 40 ++- cl/funcinfo_metadata_test.go | 17 ++ internal/build/build.go | 23 +- internal/build/build_test.go | 43 +++ internal/build/collect.go | 1 + internal/build/funcinfo/funcinfo.go | 156 ++++++++++ internal/build/funcinfo/funcinfo_test.go | 134 +++++++++ internal/build/funcinfo_table.go | 223 ++++++++++++++ internal/build/funcinfo_table_test.go | 182 +++++++++++ internal/build/main_module.go | 2 + runtime/internal/clite/debug/_wrap/debug.c | 17 +- runtime/internal/lib/runtime/extern.go | 19 +- .../lib/runtime/pprof_runtime_stub_llgo.go | 14 +- runtime/internal/lib/runtime/runtime2.go | 50 ++- runtime/internal/lib/runtime/symtab.go | 284 +++++++++++++++++- ssa/decl.go | 5 + test/go/runtime_lineinfo_stack_test.go | 187 ++++++++++++ test/goroot/xfail.yaml | 4 - 19 files changed, 1403 insertions(+), 33 deletions(-) create mode 100644 internal/build/funcinfo/funcinfo.go create mode 100644 internal/build/funcinfo/funcinfo_test.go create mode 100644 internal/build/funcinfo_table.go create mode 100644 internal/build/funcinfo_table_test.go create mode 100644 test/go/runtime_lineinfo_stack_test.go diff --git a/cl/cltest/cltest.go b/cl/cltest/cltest.go index d70ab47672..6a151f67ef 100644 --- a/cl/cltest/cltest.go +++ b/cl/cltest/cltest.go @@ -242,7 +242,10 @@ func testFrom(t *testing.T, pkgDir, sel string) { if spec.Mode == littest.ModeSkip { return } - v := llgen.GenFrom(pkgDir) + var v string + withFuncInfoDisabled(func() { + v = llgen.GenFrom(pkgDir) + }) if spec.Mode == littest.ModeFileCheck { if err := littest.Check(spec, v); err != nil { _ = os.WriteFile(pkgDir+"/result.txt", []byte(v), 0644) @@ -294,7 +297,14 @@ func testRunAndTestFrom(t *testing.T, pkgDir, relPkg, sel string, opts runOption } } - output, err := runWithConf(relPkg, pkgDir, conf) + var output []byte + if checkIR { + withFuncInfoDisabled(func() { + output, err = runWithConf(relPkg, pkgDir, conf) + }) + } else { + output, err = runWithConf(relPkg, pkgDir, conf) + } if err != nil { t.Logf("raw output:\n%s", string(output)) t.Fatalf("run failed: %v\noutput: %s", err, string(output)) @@ -509,6 +519,20 @@ func readIRSpec(pkgDir string) (littest.Spec, bool, error) { return spec, true, nil } +func withFuncInfoDisabled(fn func()) { + const key = "LLGO_FUNCINFO" + old, ok := os.LookupEnv(key) + _ = os.Setenv(key, "0") + defer func() { + if ok { + _ = os.Setenv(key, old) + } else { + _ = os.Unsetenv(key) + } + }() + fn() +} + func filterRunOutput(in []byte) []byte { // Tests compare output with expect.txt. Some toolchain/environment warnings are // inherently machine-specific and should not be part of the golden output. @@ -542,6 +566,13 @@ func filterRunOutput(in []byte) []byte { func CompileIREx(t *testing.T, src any, fname string, dbg bool, configure func(llssa.Program)) string { t.Helper() + // Build.Do configures cl debug globals for full-package builds. Keep the + // single-file compiler assertions independent from any prior build test. + cl.EnableDebug(dbg) + cl.EnableDbgSyms(dbg) + defer cl.EnableDebug(false) + defer cl.EnableDbgSyms(false) + fset := token.NewFileSet() f, err := parser.ParseFile(fset, fname, src, parser.ParseComments) if err != nil { diff --git a/cl/compile.go b/cl/compile.go index 4de05558d0..fe1dbfc8b1 100644 --- a/cl/compile.go +++ b/cl/compile.go @@ -469,9 +469,14 @@ func (p *context) compileFuncDecl(pkg llssa.Package, f *ssa.Function) (llssa.Fun } if fn == nil { fn = pkg.NewFuncEx(name, sig, llssa.Background(ftype), hasCtx, p.needsLinkOnce(f)) - if disableInline { - fn.Inline(llssa.NoInline) - } + } + noInlineDirective := hasNoInlineDirective(f) + runtimeStackNoInline := needsRuntimeStackNoInline(pkgTypes, f) + if disableInline || noInlineDirective || runtimeStackNoInline { + fn.Inline(llssa.NoInline) + } + if noInlineDirective || runtimeStackNoInline { + fn.DisableTailCalls() } p.funcs[f] = fn isCgo := isCgoExternSymbol(f) @@ -568,6 +573,35 @@ func (p *context) compileFuncDecl(pkg llssa.Package, f *ssa.Function) (llssa.Fun return fn, nil, goFunc } +func hasNoInlineDirective(f *ssa.Function) bool { + decl, _ := f.Syntax().(*ast.FuncDecl) + if decl == nil || decl.Doc == nil { + return false + } + for _, c := range decl.Doc.List { + if c.Text == "//go:noinline" { + return true + } + } + return false +} + +func needsRuntimeStackNoInline(pkg *types.Package, f *ssa.Function) bool { + if pkg == nil || f == nil || f.Signature.Recv() != nil { + return false + } + switch pkg.Path() { + case "runtime", "github.com/goplus/llgo/runtime/internal/lib/runtime": + switch f.Name() { + case "Caller", "Callers", "callers": + return true + } + case "github.com/goplus/llgo/runtime/internal/clite/debug": + return f.Name() == "StackTrace" + } + return false +} + func (p *context) getFuncBodyPos(f *ssa.Function) token.Position { if f.Object() != nil { if fn, ok := f.Object().(*types.Func); ok && fn.Scope() != nil { diff --git a/cl/funcinfo_metadata_test.go b/cl/funcinfo_metadata_test.go index 5b39e3a2cc..5319b16751 100644 --- a/cl/funcinfo_metadata_test.go +++ b/cl/funcinfo_metadata_test.go @@ -94,6 +94,23 @@ func (T) method() {} } } +func TestNoInlineDirectiveDisablesTailCalls(t *testing.T) { + const src = `package foo + +func caller() { callee() } + +//go:noinline +func callee() {} +` + ir := cltest.CompileIREx(t, src, "foo.go", false, nil) + if !strings.Contains(ir, `define void @foo.callee()`) { + t.Fatalf("missing callee function:\n%s", ir) + } + if !strings.Contains(ir, `noinline`) || !strings.Contains(ir, `"disable-tail-calls"="true"`) { + t.Fatalf("callee should disable inlining and tail calls:\n%s", ir) + } +} + func parseFuncInfoRecords(t *testing.T, ir string) map[string]funcInfoRecord { t.Helper() diff --git a/internal/build/build.go b/internal/build/build.go index 149411815f..93041b7406 100644 --- a/internal/build/build.go +++ b/internal/build/build.go @@ -318,6 +318,7 @@ func Do(args []string, conf *Config) ([]Package, error) { prog := llssa.NewProgram(target) prog.EnableGoGlobalDCE(conf.goGlobalDCEEnabled()) + prog.EnableFuncInfoMetadata(conf.Mode != ModeGen && IsFuncInfoEnabled()) sizes := func(sizes types.Sizes, compiler, arch string) types.Sizes { if arch == "wasm" { sizes = &types.StdSizes{WordSize: 4, MaxAlign: 4} @@ -1050,6 +1051,7 @@ func linkMainPkg(ctx *context, pkg *packages.Package, pkgs []*aPackage, outputPa methodByIndex: methodByIndex, methodByName: methodByName, abiSymbols: linkedModuleGlobals(linkedOrder), + funcInfo: prepareFuncInfoTableRecords(collectFuncInfo(linkedOrder), nil), }) entryObjFile, err := exportObject(ctx, "entry_main", entryPkg.ExportFile, entryPkg.LPkg) if err != nil { @@ -1130,9 +1132,9 @@ func linkObjFiles(ctx *context, app string, objFiles, linkArgs []string, verbose if needsLinuxNoPIE(ctx, linkArgs) { buildArgs = append(buildArgs, "-no-pie") } + buildArgs = append(buildArgs, linuxExportDynamicArgs(ctx)...) } - // Add common linker arguments based on target OS and architecture if IsDbgSymsEnabled() { buildArgs = append(buildArgs, "-gdwarf-4") } @@ -1178,6 +1180,20 @@ func needsLinuxNoPIE(ctx *context, linkArgs []string) bool { return true } +func needsLinuxExportDynamic(ctx *context) bool { + return ctx.buildConf.Target == "" && ctx.buildConf.Goos == "linux" && IsFuncInfoEnabled() +} + +func linuxExportDynamicArgs(ctx *context) []string { + if !needsLinuxExportDynamic(ctx) { + return nil + } + return []string{ + "-Wl,--export-dynamic-symbol=main.*", + "-Wl,--export-dynamic-symbol=command-line-arguments.*", + } +} + // archiver returns the archiving tool to use for the current context. // For wasm targets and LTO builds, it prefers llvm-ar because linkers need // LLVM-aware archive indexes for wasm objects and bitcode members. @@ -1796,6 +1812,7 @@ var ( const llgoDebug = "LLGO_DEBUG" const llgoDbgSyms = "LLGO_DEBUG_SYMBOLS" +const llgoFuncInfo = "LLGO_FUNCINFO" const llgoTrace = "LLGO_TRACE" const llgoOptimize = "LLGO_OPTIMIZE" const llgoWasmRuntime = "LLGO_WASM_RUNTIME" @@ -1843,6 +1860,10 @@ func IsDbgEnabled() bool { return isEnvOn(llgoDebug, false) || isEnvOn(llgoDbgSyms, false) } +func IsFuncInfoEnabled() bool { + return isEnvOn(llgoFuncInfo, true) +} + func IsDbgSymsEnabled() bool { return isEnvOn(llgoDbgSyms, false) } diff --git a/internal/build/build_test.go b/internal/build/build_test.go index 51401c2131..bc6f89d785 100644 --- a/internal/build/build_test.go +++ b/internal/build/build_test.go @@ -55,6 +55,49 @@ func TestNeedsLinuxNoPIE(t *testing.T) { } } +func TestNeedsLinuxExportDynamic(t *testing.T) { + t.Setenv(llgoFuncInfo, "") + ctx := &context{buildConf: &Config{Goos: "linux"}} + if !needsLinuxExportDynamic(ctx) { + t.Fatal("linux funcinfo executable should export dynamic symbols") + } + if got := linuxExportDynamicArgs(ctx); strings.Join(got, " ") != "-Wl,--export-dynamic-symbol=main.* -Wl,--export-dynamic-symbol=command-line-arguments.*" { + t.Fatalf("linuxExportDynamicArgs = %v", got) + } + t.Setenv(llgoFuncInfo, "0") + if needsLinuxExportDynamic(ctx) { + t.Fatal("LLGO_FUNCINFO=0 should disable dynamic symbol export") + } + if got := linuxExportDynamicArgs(ctx); got != nil { + t.Fatalf("disabled linuxExportDynamicArgs = %v, want nil", got) + } + t.Setenv(llgoFuncInfo, "1") + ctx.buildConf.Goos = "darwin" + if needsLinuxExportDynamic(ctx) { + t.Fatal("non-linux executable should not export dynamic symbols for funcinfo") + } + ctx.buildConf.Goos = "linux" + ctx.buildConf.Target = "wasi" + if needsLinuxExportDynamic(ctx) { + t.Fatal("named targets should not force host linux dynamic symbol export") + } +} + +func TestIsFuncInfoEnabled(t *testing.T) { + t.Setenv(llgoFuncInfo, "") + if !IsFuncInfoEnabled() { + t.Fatal("funcinfo should be enabled by default") + } + t.Setenv(llgoFuncInfo, "0") + if IsFuncInfoEnabled() { + t.Fatal("LLGO_FUNCINFO=0 should disable funcinfo") + } + t.Setenv(llgoFuncInfo, "1") + if !IsFuncInfoEnabled() { + t.Fatal("LLGO_FUNCINFO=1 should enable funcinfo") + } +} + func mockRun(args []string, cfg *Config) { defer mockable.DisableMock() mockable.EnableMock() diff --git a/internal/build/collect.go b/internal/build/collect.go index ab6d7076f6..dd30daf72b 100644 --- a/internal/build/collect.go +++ b/internal/build/collect.go @@ -82,6 +82,7 @@ func (c *context) collectEnvInputs(m *manifestBuilder) { envVars := []string{ llgoDebug, llgoDbgSyms, + llgoFuncInfo, llgoTrace, llgoOptimize, llgoWasmRuntime, diff --git a/internal/build/funcinfo/funcinfo.go b/internal/build/funcinfo/funcinfo.go new file mode 100644 index 0000000000..c092b606e9 --- /dev/null +++ b/internal/build/funcinfo/funcinfo.go @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package funcinfo + +import ( + "fmt" + "math" + "sort" + "strings" +) + +type Record struct { + Symbol string + Name string + File string + Line uint32 + Column uint32 +} + +type EncodedRecord struct { + Symbol uint32 + Name uint32 + File uint32 + Line uint32 + Column uint32 +} + +type Table struct { + Records []EncodedRecord + Strings []byte + Hash []uint32 +} + +func Encode(records []Record) (Table, error) { + if len(records) == 0 { + return Table{}, nil + } + pool := stringPool{ + offsets: map[string]uint32{"": 0}, + data: []byte{0}, + text: "\x00", + } + for _, s := range collectStrings(records) { + if _, err := pool.offset(s); err != nil { + return Table{}, err + } + } + out := Table{ + Records: make([]EncodedRecord, 0, len(records)), + } + for _, rec := range records { + out.Records = append(out.Records, EncodedRecord{ + Symbol: pool.offsets[rec.Symbol], + Name: pool.offsets[rec.Name], + File: pool.offsets[rec.File], + Line: rec.Line, + Column: rec.Column, + }) + } + out.Strings = pool.data + out.Hash = buildHash(records) + return out, nil +} + +func collectStrings(records []Record) []string { + seen := make(map[string]bool) + for _, rec := range records { + seen[rec.Symbol] = true + seen[rec.Name] = true + seen[rec.File] = true + } + delete(seen, "") + out := make([]string, 0, len(seen)) + for s := range seen { + out = append(out, s) + } + sort.Slice(out, func(i, j int) bool { + if len(out[i]) != len(out[j]) { + return len(out[i]) > len(out[j]) + } + return out[i] < out[j] + }) + return out +} + +type stringPool struct { + offsets map[string]uint32 + data []byte + text string +} + +func (p *stringPool) offset(s string) (uint32, error) { + if off, ok := p.offsets[s]; ok { + return off, nil + } + if off := strings.Index(p.text, s+"\x00"); off >= 0 { + uoff := uint32(off) + p.offsets[s] = uoff + return uoff, nil + } + if len(p.data)+len(s)+1 > math.MaxUint32 { + return 0, fmt.Errorf("funcinfo string table exceeds 4 GiB") + } + off := uint32(len(p.data)) + p.data = append(p.data, s...) + p.data = append(p.data, 0) + p.text = string(p.data) + p.offsets[s] = off + return off, nil +} + +func buildHash(records []Record) []uint32 { + if len(records) == 0 { + return nil + } + buckets := 1 + for buckets*3 < len(records)*4 { + buckets <<= 1 + } + hash := make([]uint32, buckets) + for i, rec := range records { + slot := int(HashString(rec.Symbol) & uint32(buckets-1)) + for hash[slot] != 0 { + slot = (slot + 1) & (buckets - 1) + } + hash[slot] = uint32(i + 1) + } + return hash +} + +func HashString(s string) uint32 { + const ( + offset = uint32(2166136261) + prime = uint32(16777619) + ) + h := offset + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= prime + } + return h +} diff --git a/internal/build/funcinfo/funcinfo_test.go b/internal/build/funcinfo/funcinfo_test.go new file mode 100644 index 0000000000..7bc92ec8b1 --- /dev/null +++ b/internal/build/funcinfo/funcinfo_test.go @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package funcinfo + +import "testing" + +func TestEncodePoolsStringsAndBuildsHash(t *testing.T) { + table, err := Encode([]Record{ + {Symbol: "example.com/p.a", Name: "example.com/p.A", File: "/src/p/shared.go", Line: 10, Column: 1}, + {Symbol: "example.com/p.b", Name: "example.com/p.B", File: "shared.go", Line: 20, Column: 2}, + }) + if err != nil { + t.Fatal(err) + } + if len(table.Records) != 2 { + t.Fatalf("encoded records = %d, want 2", len(table.Records)) + } + if table.Records[0].File == table.Records[1].File { + t.Fatalf("suffix sharing should not collapse distinct file strings to the same offset") + } + if got := cstring(table.Strings, table.Records[1].File); got != "shared.go" { + t.Fatalf("suffix file string = %q, want shared.go", got) + } + if len(table.Hash) == 0 || len(table.Hash)&(len(table.Hash)-1) != 0 { + t.Fatalf("hash bucket count = %d, want power-of-two non-zero", len(table.Hash)) + } + if idx, ok := lookup(table, "example.com/p.a"); !ok || idx != 0 { + t.Fatalf("lookup a = %d, %v; want 0, true", idx, ok) + } + if idx, ok := lookup(table, "example.com/p.b"); !ok || idx != 1 { + t.Fatalf("lookup b = %d, %v; want 1, true", idx, ok) + } + if _, ok := lookup(table, "missing"); ok { + t.Fatalf("lookup missing succeeded") + } +} + +func TestEncodeUsesUint32Records(t *testing.T) { + table, err := Encode([]Record{{Symbol: "s", Name: "n", File: "f", Line: 1, Column: 2}}) + if err != nil { + t.Fatal(err) + } + if got, want := len(table.Records), 1; got != want { + t.Fatalf("records = %d, want %d", got, want) + } + rec := table.Records[0] + if got, want := cstring(table.Strings, rec.Symbol), "s"; got != want { + t.Fatalf("symbol = %q, want %q", got, want) + } + if got, want := cstring(table.Strings, rec.Name), "n"; got != want { + t.Fatalf("name = %q, want %q", got, want) + } + if got, want := cstring(table.Strings, rec.File), "f"; got != want { + t.Fatalf("file = %q, want %q", got, want) + } + if rec.Line != 1 || rec.Column != 2 { + t.Fatalf("source position = %d:%d, want 1:2", rec.Line, rec.Column) + } +} + +func TestEncodeHashHandlesCollisions(t *testing.T) { + a, b := collisionPair(t) + table, err := Encode([]Record{ + {Symbol: a, Name: a, File: "a.go"}, + {Symbol: b, Name: b, File: "b.go"}, + }) + if err != nil { + t.Fatal(err) + } + if idx, ok := lookup(table, a); !ok || idx != 0 { + t.Fatalf("lookup collision a = %d, %v; want 0, true", idx, ok) + } + if idx, ok := lookup(table, b); !ok || idx != 1 { + t.Fatalf("lookup collision b = %d, %v; want 1, true", idx, ok) + } +} + +func collisionPair(t *testing.T) (string, string) { + t.Helper() + const mask = uint32(3) + seen := make(map[uint32]string) + for i := 0; i < 100; i++ { + s := string(rune('a' + i)) + slot := HashString(s) & mask + if prev, ok := seen[slot]; ok { + return prev, s + } + seen[slot] = s + } + t.Fatal("failed to find hash collision") + return "", "" +} + +func cstring(data []byte, off uint32) string { + end := int(off) + for end < len(data) && data[end] != 0 { + end++ + } + return string(data[off:end]) +} + +func lookup(table Table, symbol string) (int, bool) { + if len(table.Hash) == 0 { + return 0, false + } + mask := uint32(len(table.Hash) - 1) + slot := HashString(symbol) & mask + for probes := 0; probes < len(table.Hash); probes++ { + idx := table.Hash[slot] + if idx == 0 { + return 0, false + } + rec := table.Records[idx-1] + if cstring(table.Strings, rec.Symbol) == symbol { + return int(idx - 1), true + } + slot = (slot + 1) & mask + } + return 0, false +} diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go new file mode 100644 index 0000000000..30c63c5fe4 --- /dev/null +++ b/internal/build/funcinfo_table.go @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package build + +import ( + "sort" + + "github.com/xgo-dev/llvm" + + buildfuncinfo "github.com/goplus/llgo/internal/build/funcinfo" + llssa "github.com/goplus/llgo/ssa" +) + +const ( + funcInfoTableSymbol = "__llgo_funcinfo_table" + funcInfoCountSymbol = "__llgo_funcinfo_count" + funcInfoStringsSymbol = "__llgo_funcinfo_strings" + funcInfoHashSymbol = "__llgo_funcinfo_hash" + funcInfoHashMaskSymbol = "__llgo_funcinfo_hash_mask" + funcInfoDataSymbol = "__llgo_funcinfo_table$data" + funcInfoStringsDataSymbol = "__llgo_funcinfo_strings$data" + funcInfoHashDataSymbol = "__llgo_funcinfo_hash$data" +) + +type funcInfoRecord struct { + symbol string + name string + file string + line uint32 + column uint32 +} + +func collectFuncInfo(pkgs []Package) []funcInfoRecord { + seen := make(map[string]funcInfoRecord) + for _, pkg := range pkgs { + if pkg == nil || pkg.LPkg == nil { + continue + } + for _, rec := range readFuncInfo(pkg.LPkg.Module()) { + if rec.symbol == "" { + continue + } + if _, ok := seen[rec.symbol]; !ok { + seen[rec.symbol] = rec + } + } + } + if len(seen) == 0 { + return nil + } + out := make([]funcInfoRecord, 0, len(seen)) + for _, rec := range seen { + out = append(out, rec) + } + sort.Slice(out, func(i, j int) bool { + return out[i].symbol < out[j].symbol + }) + return out +} + +func prepareFuncInfoTableRecords(records []funcInfoRecord, liveSymbols map[string]none) []funcInfoRecord { + if len(records) == 0 { + return nil + } + // A nil liveSymbols means no post-DCE live symbol set is available yet. + // The current table is still DCE-compatible because it stores only strings, + // never function pointers or llvm.compiler.used references. Once the linker + // or an LTO hook exposes a live-symbol set, pass it here to drop metadata for + // functions removed by global DCE before materializing the runtime table. + if liveSymbols == nil { + return records + } + out := records[:0] + for _, rec := range records { + if _, ok := liveSymbols[rec.symbol]; ok { + out = append(out, rec) + } + } + if len(out) == 0 { + return nil + } + return out +} + +func readFuncInfo(mod llvm.Module) []funcInfoRecord { + rows := mod.NamedMetadataOperands(llssa.FuncInfoMetadataName) + if len(rows) == 0 { + return nil + } + out := make([]funcInfoRecord, 0, len(rows)) + for _, row := range rows { + fields := row.MDNodeOperands() + if len(fields) != 6 || fields[0].ZExtValue() != 1 { + continue + } + if !fields[1].IsAMDString() || !fields[2].IsAMDString() || !fields[3].IsAMDString() { + continue + } + out = append(out, funcInfoRecord{ + symbol: fields[1].MDString(), + name: fields[2].MDString(), + file: fields[3].MDString(), + line: uint32(fields[4].ZExtValue()), + column: uint32(fields[5].ZExtValue()), + }) + } + return out +} + +func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord) { + mod := pkg.Module() + llvmCtx := mod.Context() + i8Type := llvmCtx.Int8Type() + i32Type := llvmCtx.Int32Type() + countType := llvmCtx.IntType(ctx.prog.PointerSize() * 8) + recordType := llvmCtx.StructType([]llvm.Type{ + i32Type, + i32Type, + i32Type, + i32Type, + i32Type, + }, false) + + tablePtr := llvm.AddGlobal(mod, llvm.PointerType(recordType, 0), funcInfoTableSymbol) + stringsPtr := llvm.AddGlobal(mod, llvm.PointerType(i8Type, 0), funcInfoStringsSymbol) + hashPtr := llvm.AddGlobal(mod, llvm.PointerType(i32Type, 0), funcInfoHashSymbol) + count := llvm.AddGlobal(mod, countType, funcInfoCountSymbol) + hashMask := llvm.AddGlobal(mod, countType, funcInfoHashMaskSymbol) + if len(records) == 0 { + tablePtr.SetInitializer(llvm.ConstPointerNull(tablePtr.GlobalValueType())) + stringsPtr.SetInitializer(llvm.ConstPointerNull(stringsPtr.GlobalValueType())) + hashPtr.SetInitializer(llvm.ConstPointerNull(hashPtr.GlobalValueType())) + count.SetInitializer(llvm.ConstInt(countType, 0, false)) + hashMask.SetInitializer(llvm.ConstInt(countType, 0, false)) + return + } + + encoded, err := buildfuncinfo.Encode(toFuncInfoRecords(records)) + if err != nil { + panic(err) + } + + values := make([]llvm.Value, 0, len(encoded.Records)) + for _, rec := range encoded.Records { + values = append(values, llvm.ConstNamedStruct(recordType, []llvm.Value{ + llvm.ConstInt(i32Type, uint64(rec.Symbol), false), + llvm.ConstInt(i32Type, uint64(rec.Name), false), + llvm.ConstInt(i32Type, uint64(rec.File), false), + llvm.ConstInt(i32Type, uint64(rec.Line), false), + llvm.ConstInt(i32Type, uint64(rec.Column), false), + })) + } + arrayType := llvm.ArrayType(recordType, len(values)) + data := llvm.AddGlobal(mod, arrayType, funcInfoDataSymbol) + data.SetInitializer(llvm.ConstArray(recordType, values)) + data.SetLinkage(llvm.PrivateLinkage) + data.SetGlobalConstant(true) + data.SetUnnamedAddr(true) + data.SetAlignment(4) + + stringArrayType := llvm.ArrayType(i8Type, len(encoded.Strings)) + stringData := llvm.AddGlobal(mod, stringArrayType, funcInfoStringsDataSymbol) + stringData.SetInitializer(llvmCtx.ConstString(string(encoded.Strings), false)) + stringData.SetLinkage(llvm.PrivateLinkage) + stringData.SetGlobalConstant(true) + stringData.SetUnnamedAddr(true) + stringData.SetAlignment(1) + + hashValues := make([]llvm.Value, 0, len(encoded.Hash)) + for _, idx := range encoded.Hash { + hashValues = append(hashValues, llvm.ConstInt(i32Type, uint64(idx), false)) + } + hashArrayType := llvm.ArrayType(i32Type, len(hashValues)) + hashData := llvm.AddGlobal(mod, hashArrayType, funcInfoHashDataSymbol) + hashData.SetInitializer(llvm.ConstArray(i32Type, hashValues)) + hashData.SetLinkage(llvm.PrivateLinkage) + hashData.SetGlobalConstant(true) + hashData.SetUnnamedAddr(true) + hashData.SetAlignment(4) + + tablePtr.SetInitializer(llvm.ConstInBoundsGEP(arrayType, data, []llvm.Value{ + llvm.ConstInt(countType, 0, false), + llvm.ConstInt(countType, 0, false), + })) + stringsPtr.SetInitializer(llvm.ConstInBoundsGEP(stringArrayType, stringData, []llvm.Value{ + llvm.ConstInt(countType, 0, false), + llvm.ConstInt(countType, 0, false), + })) + hashPtr.SetInitializer(llvm.ConstInBoundsGEP(hashArrayType, hashData, []llvm.Value{ + llvm.ConstInt(countType, 0, false), + llvm.ConstInt(countType, 0, false), + })) + count.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Records)), false)) + hashMask.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Hash)-1), false)) +} + +func toFuncInfoRecords(records []funcInfoRecord) []buildfuncinfo.Record { + out := make([]buildfuncinfo.Record, len(records)) + for i, rec := range records { + out[i] = buildfuncinfo.Record{ + Symbol: rec.symbol, + Name: rec.name, + File: rec.file, + Line: rec.line, + Column: rec.column, + } + } + return out +} diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go new file mode 100644 index 0000000000..f7367ebf56 --- /dev/null +++ b/internal/build/funcinfo_table_test.go @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package build + +import ( + "strings" + "testing" + + "github.com/xgo-dev/llvm" + + "github.com/goplus/llgo/internal/packages" + llssa "github.com/goplus/llgo/ssa" +) + +func TestFuncInfoTableMaterializesMetadataWithoutFunctionPointers(t *testing.T) { + prog := llssa.NewProgram(nil) + src := prog.NewPackage("example.com/p", "example.com/p") + src.EmitFuncInfo("example.com/p.live", "example.com/p.Live", "live.go", 17, 3) + src.EmitFuncInfo("example.com/p.live", "example.com/p.LiveDuplicate", "dup.go", 19, 1) + + records := collectFuncInfo([]Package{{LPkg: src}}) + if len(records) != 1 { + t.Fatalf("collectFuncInfo returned %d records, want 1", len(records)) + } + if got := records[0]; got.symbol != "example.com/p.live" || got.name != "example.com/p.Live" || got.file != "live.go" || got.line != 17 || got.column != 3 { + t.Fatalf("unexpected record: %+v", got) + } + + ctx := &context{ + prog: prog, + buildConf: &Config{ + BuildMode: BuildModeExe, + Goos: "linux", + Goarch: "amd64", + }, + } + entry := genMainModule(ctx, llssa.PkgRuntime, &packages.Package{ + PkgPath: "example.com/main", + ExportFile: "main.a", + }, &genConfig{funcInfo: records}) + ir := entry.LPkg.String() + for _, want := range []string{ + "@__llgo_funcinfo_table = global ptr", + "@__llgo_funcinfo_strings = global ptr", + "@__llgo_funcinfo_hash = global ptr", + "@__llgo_funcinfo_count = global i64 1", + "@__llgo_funcinfo_hash_mask = global i64 1", + `@"__llgo_funcinfo_table$data" = private unnamed_addr constant [1 x { i32, i32, i32, i32, i32 }]`, + `@"__llgo_funcinfo_strings$data" = private unnamed_addr constant [47 x i8]`, + `@"__llgo_funcinfo_hash$data" = private unnamed_addr constant [2 x i32]`, + `example.com/p.live\00`, + `example.com/p.Live\00`, + `live.go\00`, + "i32 17", + "i32 3", + } { + if !strings.Contains(ir, want) { + t.Fatalf("funcinfo table IR missing %q:\n%s", want, ir) + } + } + if strings.Contains(ir, `ptr @"example.com/p.live"`) { + t.Fatalf("funcinfo table must not reference function pointers:\n%s", ir) + } +} + +func TestPrepareFuncInfoTableRecordsFiltersLiveSymbols(t *testing.T) { + records := []funcInfoRecord{ + {symbol: "dead", name: "dead"}, + {symbol: "live", name: "live"}, + } + if got := prepareFuncInfoTableRecords(nil, nil); got != nil { + t.Fatalf("empty records = %+v, want nil", got) + } + if got := prepareFuncInfoTableRecords(records, nil); len(got) != 2 { + t.Fatalf("nil live set kept %d records, want 2", len(got)) + } + got := prepareFuncInfoTableRecords(records, map[string]none{"live": {}}) + if len(got) != 1 || got[0].symbol != "live" { + t.Fatalf("filtered records = %+v, want live only", got) + } + if got := prepareFuncInfoTableRecords(records, map[string]none{}); got != nil { + t.Fatalf("empty live set = %+v, want nil", got) + } +} + +func TestFuncInfoTablePoolsRepeatedStrings(t *testing.T) { + prog := llssa.NewProgram(nil) + records := []funcInfoRecord{ + {symbol: "example.com/p.a", name: "example.com/p.A", file: "shared.go", line: 10}, + {symbol: "example.com/p.b", name: "example.com/p.B", file: "shared.go", line: 20}, + } + ctx := &context{ + prog: prog, + buildConf: &Config{ + BuildMode: BuildModeExe, + Goos: "linux", + Goarch: "amd64", + }, + } + entry := genMainModule(ctx, llssa.PkgRuntime, &packages.Package{ + PkgPath: "example.com/main", + ExportFile: "main.a", + }, &genConfig{funcInfo: records}) + if got := strings.Count(entry.LPkg.String(), `shared.go\00`); got != 1 { + t.Fatalf("shared file string emitted %d times, want 1", got) + } +} + +func TestFuncInfoTableEmptyDefinitions(t *testing.T) { + prog := llssa.NewProgram(nil) + ctx := &context{ + prog: prog, + buildConf: &Config{ + BuildMode: BuildModeExe, + Goos: "linux", + Goarch: "amd64", + }, + } + entry := genMainModule(ctx, llssa.PkgRuntime, &packages.Package{ + PkgPath: "example.com/main", + ExportFile: "main.a", + }, &genConfig{}) + ir := entry.LPkg.String() + for _, want := range []string{ + "@__llgo_funcinfo_table = global ptr null", + "@__llgo_funcinfo_strings = global ptr null", + "@__llgo_funcinfo_hash = global ptr null", + "@__llgo_funcinfo_count = global i64 0", + "@__llgo_funcinfo_hash_mask = global i64 0", + } { + if !strings.Contains(ir, want) { + t.Fatalf("empty funcinfo table IR missing %q:\n%s", want, ir) + } + } +} + +func TestFuncInfoTableIgnoresInvalidMetadata(t *testing.T) { + prog := llssa.NewProgram(nil) + pkg := prog.NewPackage("example.com/p", "example.com/p") + mod := pkg.Module() + ctx := mod.Context() + i32 := ctx.Int32Type() + mdstr := func(s string) llvm.Metadata { return ctx.MDString(s) } + mdint := func(v uint64) llvm.Metadata { + return llvm.ConstInt(i32, v, false).ConstantAsMetadata() + } + add := func(fields ...llvm.Metadata) { + mod.AddNamedMetadataOperand(llssa.FuncInfoMetadataName, ctx.MDNode(fields)) + } + + add(mdstr("short")) + add(mdint(2), mdstr("bad.version"), mdstr("bad.version"), mdstr("bad.go"), mdint(1), mdint(1)) + add(mdint(1), mdint(0), mdstr("bad.symbol"), mdstr("bad.go"), mdint(1), mdint(1)) + add(mdint(1), mdstr(""), mdstr("empty.symbol"), mdstr("empty.go"), mdint(1), mdint(1)) + + if got := readFuncInfo(mod); len(got) != 1 || got[0].symbol != "" { + t.Fatalf("readFuncInfo invalid rows = %+v, want one empty-symbol row", got) + } + if got := collectFuncInfo([]Package{nil, {}, {LPkg: pkg}}); len(got) != 0 { + t.Fatalf("collectFuncInfo invalid rows = %+v, want none", got) + } + + empty := ctx.NewModule("empty") + defer empty.Dispose() + if got := readFuncInfo(empty); got != nil { + t.Fatalf("readFuncInfo(empty) = %+v, want nil", got) + } +} diff --git a/internal/build/main_module.go b/internal/build/main_module.go index d5ac73671e..9f68a976ac 100644 --- a/internal/build/main_module.go +++ b/internal/build/main_module.go @@ -43,6 +43,7 @@ type genConfig struct { methodByIndex map[int]none methodByName map[string]none abiSymbols map[string]none + funcInfo []funcInfoRecord } // genMainModule generates the main entry module for an llgo program. @@ -60,6 +61,7 @@ func genMainModule(ctx *context, rtPkgPath string, pkg *packages.Package, cfg *g argvValueType := prog.Pointer(prog.CStr()) argvVar := mainPkg.NewVarEx("__llgo_argv", prog.Pointer(argvValueType)) argvVar.InitNil() + emitFuncInfoTable(ctx, mainPkg, cfg.funcInfo) exportFile := pkg.ExportFile if exportFile == "" { diff --git a/runtime/internal/clite/debug/_wrap/debug.c b/runtime/internal/clite/debug/_wrap/debug.c index 32d87903bf..cf050c8848 100644 --- a/runtime/internal/clite/debug/_wrap/debug.c +++ b/runtime/internal/clite/debug/_wrap/debug.c @@ -7,6 +7,7 @@ #endif #include +#include #include void *llgo_address() { @@ -14,10 +15,14 @@ void *llgo_address() { } int llgo_addrinfo(void *addr, Dl_info *info) { - return dladdr(addr, info); + int saved_errno = errno; + int ret = dladdr(addr, info); + errno = saved_errno; + return ret; } void llgo_stacktrace(int skip, void *ctx, int (*fn)(void *ctx, void *pc, void *offset, void *sp, char *name)) { + int saved_errno = errno; unw_cursor_t cursor; unw_context_t context; unw_word_t offset, pc, sp; @@ -31,11 +36,17 @@ void llgo_stacktrace(int skip, void *ctx, int (*fn)(void *ctx, void *pc, void *o continue; } if (unw_get_reg(&cursor, UNW_REG_IP, &pc) == 0) { - unw_get_proc_name(&cursor, fname, sizeof(fname), &offset); + fname[0] = 0; + offset = 0; + if (unw_get_proc_name(&cursor, fname, sizeof(fname), &offset) == 0) { + fname[sizeof(fname) - 1] = 0; + } unw_get_reg(&cursor, UNW_REG_SP, &sp); if (fn(ctx, (void*)pc, (void*)offset, (void*)sp, fname) == 0) { + errno = saved_errno; return; } } } -} \ No newline at end of file + errno = saved_errno; +} diff --git a/runtime/internal/lib/runtime/extern.go b/runtime/internal/lib/runtime/extern.go index 1fb397dd8a..377c973876 100644 --- a/runtime/internal/lib/runtime/extern.go +++ b/runtime/internal/lib/runtime/extern.go @@ -9,16 +9,26 @@ import ( ) func Caller(skip int) (pc uintptr, file string, line int, ok bool) { - // llgo currently doesn't have reliable source file/line mapping from PC. - // Return a stable placeholder location so stdlib log/testing can proceed. var pcs [1]uintptr - if Callers(skip+1, pcs[:]) < 1 { + if Callers(skip+2, pcs[:]) < 1 { return 0, "", 0, false } - return pcs[0], "???", 1, true + sym := frameSymbol(pcs[0]) + file, line = sym.file, sym.line + if file == "" { + file = "???" + } + if line == 0 { + line = 1 + } + return pcs[0], file, line, true } func Callers(skip int, pc []uintptr) int { + return callers(skip+1, pc) +} + +func callers(skip int, pc []uintptr) int { if len(pc) == 0 { return 0 } @@ -28,6 +38,7 @@ func Callers(skip int, pc []uintptr) int { return false } pc[n] = fr.PC + recordFrameSymbol(fr.PC, fr.Offset, fr.Name) n++ return true }) diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index 5b8155d0e8..bd131c9a25 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -85,5 +85,17 @@ func NumGoroutine() int { func SetCPUProfileRate(hz int) {} func FuncForPC(pc uintptr) *Func { - return nil + sym := frameSymbol(pc) + if !sym.ok && sym.function == "" { + return &Func{entry: pc, name: unknownFunctionName(pc)} + } + name := sym.function + if name == "" { + name = unknownFunctionName(pc) + } + entry := sym.entry + if entry == 0 { + entry = pc + } + return &Func{entry: entry, name: name} } diff --git a/runtime/internal/lib/runtime/runtime2.go b/runtime/internal/lib/runtime/runtime2.go index 7327f9892c..8bf049e087 100644 --- a/runtime/internal/lib/runtime/runtime2.go +++ b/runtime/internal/lib/runtime/runtime2.go @@ -17,7 +17,55 @@ type _func struct { } func Stack(buf []byte, all bool) int { - return 0 + var pcs [64]uintptr + n := Callers(0, pcs[:]) + out := make([]byte, 0, 1024) + out = append(out, "goroutine 1 [running]:\n"...) + frames := CallersFrames(pcs[:n]) + for { + frame, more := frames.Next() + if frame.Function == "" { + frame.Function = unknownFunctionName(frame.PC) + } + out = append(out, frame.Function...) + out = append(out, "()\n\t"...) + if frame.File == "" { + out = append(out, "???"...) + } else { + out = append(out, frame.File...) + } + out = append(out, ':') + out = appendInt(out, frame.Line) + out = append(out, ' ') + out = append(out, "+0x0\n"...) + if !more { + break + } + } + if len(out) > len(buf) { + copy(buf, out[:len(buf)]) + return len(buf) + } + copy(buf, out) + return len(out) +} + +func appendInt(out []byte, v int) []byte { + if v == 0 { + return append(out, '0') + } + if v < 0 { + out = append(out, '-') + v = -v + } + var digits [20]byte + i := len(digits) + for v > 0 { + i-- + digits[i] = byte('0' + v%10) + v /= 10 + } + return append(out, digits[i:]...) } type traceError string diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index a4d18d9b30..84e04aa943 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -105,6 +105,249 @@ func unknownFunctionName(pc uintptr) string { return "pc=" + uintptrHex(pc) } +type pcSymbol struct { + pc uintptr + entry uintptr + function string + file string + line int + startLine int + ok bool +} + +type frameSymbolCacheEntry struct { + pc uintptr + offset uintptr + name string +} + +const frameSymbolCacheSize = 128 + +var frameSymbolCache [frameSymbolCacheSize]frameSymbolCacheEntry + +func recordFrameSymbol(pc, offset uintptr, name string) { + if pc == 0 || name == "" { + return + } + i := (pc >> 4) & (frameSymbolCacheSize - 1) + frameSymbolCache[i] = frameSymbolCacheEntry{pc: pc, offset: offset, name: name} +} + +type runtimeFuncInfoRecord struct { + symbol uint32 + name uint32 + file uint32 + line uint32 + column uint32 +} + +//go:linkname runtimeFuncInfoTable __llgo_funcinfo_table +var runtimeFuncInfoTable *runtimeFuncInfoRecord + +//go:linkname runtimeFuncInfoStrings __llgo_funcinfo_strings +var runtimeFuncInfoStrings *c.Char + +//go:linkname runtimeFuncInfoHash __llgo_funcinfo_hash +var runtimeFuncInfoHash *uint32 + +//go:linkname runtimeFuncInfoCount __llgo_funcinfo_count +var runtimeFuncInfoCount uintptr + +//go:linkname runtimeFuncInfoHashMask __llgo_funcinfo_hash_mask +var runtimeFuncInfoHashMask uintptr + +func hasStringPrefix(s, prefix string) bool { + if len(s) < len(prefix) { + return false + } + for i := 0; i < len(prefix); i++ { + if s[i] != prefix[i] { + return false + } + } + return true +} + +func publicFunctionName(name string) string { + const commandLineArguments = "command-line-arguments." + if hasStringPrefix(name, commandLineArguments) { + return "main." + name[len(commandLineArguments):] + } + if len(name) > 0 && name[0] == '_' { + name = name[1:] + } + return name +} + +func cStringEqual(cstr *c.Char, s string) bool { + return cStringCompare(cstr, s) == 0 +} + +func cStringCompare(cstr *c.Char, s string) int { + if cstr == nil { + if s == "" { + return 0 + } + return -1 + } + ptr := unsafe.Pointer(cstr) + for i := 0; ; i++ { + c := *(*byte)(unsafe.Add(ptr, i)) + if i == len(s) { + if c == 0 { + return 0 + } + return 1 + } + if c == 0 { + return -1 + } + if c < s[i] { + return -1 + } + if c > s[i] { + return 1 + } + } +} + +func funcInfoCString(off uint32) *c.Char { + if runtimeFuncInfoStrings == nil { + return nil + } + return (*c.Char)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoStrings), uintptr(off))) +} + +func funcInfoAt(i uintptr) *runtimeFuncInfoRecord { + size := unsafe.Sizeof(*runtimeFuncInfoTable) + return (*runtimeFuncInfoRecord)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoTable), i*size)) +} + +func funcInfoHashString(s string) uintptr { + const ( + offset = uint32(2166136261) + prime = uint32(16777619) + ) + h := offset + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= prime + } + return uintptr(h) +} + +func funcInfoForSymbol(symbol string) *runtimeFuncInfoRecord { + if symbol == "" || runtimeFuncInfoTable == nil || runtimeFuncInfoCount == 0 { + return nil + } + if runtimeFuncInfoHash != nil && runtimeFuncInfoHashMask != 0 { + slot := funcInfoHashString(symbol) & runtimeFuncInfoHashMask + for probes := uintptr(0); probes <= runtimeFuncInfoHashMask; probes++ { + idx := *(*uint32)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoHash), slot*unsafe.Sizeof(*runtimeFuncInfoHash))) + if idx == 0 { + return nil + } + if uintptr(idx) <= runtimeFuncInfoCount { + rec := funcInfoAt(uintptr(idx) - 1) + if cStringEqual(funcInfoCString(rec.symbol), symbol) { + return rec + } + } + slot = (slot + 1) & runtimeFuncInfoHashMask + } + return nil + } + for i := uintptr(0); i < runtimeFuncInfoCount; i++ { + rec := funcInfoAt(i) + if cStringEqual(funcInfoCString(rec.symbol), symbol) { + return rec + } + } + return nil +} + +func applyFuncInfo(sym *pcSymbol, rawFunction string) { + rec := funcInfoForSymbol(rawFunction) + if rec == nil { + public := publicFunctionName(rawFunction) + if public != rawFunction { + rec = funcInfoForSymbol(public) + } + } + if rec == nil { + return + } + if name := safeGoString(funcInfoCString(rec.name), ""); name != "" { + sym.function = publicFunctionName(name) + } + if file := safeGoString(funcInfoCString(rec.file), ""); file != "" { + if sym.file == "" { + sym.file = file + } + } + if rec.line != 0 { + sym.startLine = int(rec.line) + if sym.line == 0 { + sym.line = int(rec.line) + } + } + sym.ok = sym.ok || sym.function != "" || sym.file != "" +} + +func cachedFrameSymbol(pc uintptr) pcSymbol { + i := (pc >> 4) & (frameSymbolCacheSize - 1) + entry := frameSymbolCache[i] + if entry.pc != pc || entry.name == "" { + return pcSymbol{pc: pc} + } + rawFn := entry.name + fn := publicFunctionName(rawFn) + sym := pcSymbol{ + pc: pc, + entry: pc - entry.offset, + function: fn, + ok: fn != "" || entry.offset != 0, + } + applyFuncInfo(&sym, rawFn) + return sym +} + +func addrInfoSymbol(pc uintptr) pcSymbol { + var info clitedebug.Info + if clitedebug.Addrinfo(unsafe.Pointer(pc), &info) == 0 { + return cachedFrameSymbol(pc) + } + rawFn := safeGoString(info.Sname, "") + if rawFn == "" { + if sym := cachedFrameSymbol(pc); sym.ok { + return sym + } + } + fn := publicFunctionName(rawFn) + sym := pcSymbol{ + pc: pc, + entry: uintptr(info.Saddr), + function: fn, + ok: fn != "" || info.Saddr != nil, + } + applyFuncInfo(&sym, rawFn) + return sym +} + +func frameSymbol(pc uintptr) pcSymbol { + sym := addrInfoSymbol(pc) + if pc == 0 { + return sym + } + if sym.entry == 0 || pc > sym.entry { + if callSym := addrInfoSymbol(pc - 1); callSym.ok { + callSym.pc = pc + return callSym + } + } + return sym +} + func (ci *Frames) Next() (frame Frame, more bool) { for len(ci.frames) < 2 { // Find the next frame. @@ -119,8 +362,8 @@ func (ci *Frames) Next() (frame Frame, more bool) { } else { pc, ci.callers = ci.callers[0], ci.callers[1:] } - info := &clitedebug.Info{} - if clitedebug.Addrinfo(unsafe.Pointer(pc), info) == 0 { + sym := frameSymbol(pc) + if !sym.ok { ci.frames = append(ci.frames, Frame{ PC: pc, Function: unknownFunctionName(pc), @@ -131,17 +374,22 @@ func (ci *Frames) Next() (frame Frame, more bool) { }) continue } - fn := safeGoString(info.Sname, "") + fn := sym.function if fn == "" { fn = unknownFunctionName(pc) } + var f *Func + if sym.entry != 0 || fn != "" { + f = &Func{entry: sym.entry, name: fn} + } ci.frames = append(ci.frames, Frame{ PC: pc, + Func: f, Function: fn, - File: "", - Line: 0, - startLine: 0, - Entry: uintptr(info.Saddr), + File: sym.file, + Line: sym.line, + startLine: sym.startLine, + Entry: sym.entry, }) } @@ -176,19 +424,27 @@ func CallersFrames(callers []uintptr) *Frames { // A Func represents a Go function in the running binary. type Func struct { - opaque struct{} // unexported field to disallow conversions + entry uintptr + name string } func (f *Func) Name() string { - panic("todo") + if f == nil { + return "" + } + return f.name } -func (f *Func) FileLine(pc uintptr) (file string, line int) { - var info clitedebug.Info - if pc == 0 || clitedebug.Addrinfo(unsafe.Pointer(pc), &info) == 0 { - return "", 0 +func (f *Func) Entry() uintptr { + if f == nil { + return 0 } - return safeGoString(info.Fname, ""), 0 + return f.entry +} + +func (f *Func) FileLine(pc uintptr) (file string, line int) { + sym := frameSymbol(pc) + return sym.file, sym.line } // moduledata records information about the layout of the executable diff --git a/ssa/decl.go b/ssa/decl.go index 115bf28fc2..3e17b40e9f 100644 --- a/ssa/decl.go +++ b/ssa/decl.go @@ -423,4 +423,9 @@ func (p Function) Inline(inline inlineAttr) { p.impl.AddFunctionAttr(inlineAttr) } +func (p Function) DisableTailCalls() { + attr := p.Pkg.mod.Context().CreateStringAttribute("disable-tail-calls", "true") + p.impl.AddFunctionAttr(attr) +} + // ----------------------------------------------------------------------------- diff --git a/test/go/runtime_lineinfo_stack_test.go b/test/go/runtime_lineinfo_stack_test.go new file mode 100644 index 0000000000..4d46601112 --- /dev/null +++ b/test/go/runtime_lineinfo_stack_test.go @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package gotest + +import ( + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "testing" +) + +const runtimeLineInfoProbe = `package main + +import ( + "strconv" + "runtime" + "runtime/debug" + "strings" + _ "unsafe" +) + +func main() { + checkCaller() + checkCallerSkip() + checkFrames() + checkFuncForPC() + checkFuncInfoRename() + checkRuntimeStack() + checkPanicStack() +} + +//go:noinline +func checkCaller() { + _, file, line, ok := runtime.Caller(0) // CALLER_MARK + if !ok || !strings.HasSuffix(file, "main.go") || line != CALLER_LINE { + panic("bad caller: " + file + ":" + strconv.Itoa(line)) + } +} + +//go:noinline +func checkCallerSkip() { + helperCallerSkip() +} + +//go:noinline +func helperCallerSkip() { + _, file, line, ok := runtime.Caller(1) + if !ok || !strings.HasSuffix(file, "main.go") || line != CALLER_SKIP_LINE { + panic("bad caller skip: " + file + ":" + strconv.Itoa(line)) + } +} + +//go:noinline +func checkFrames() { + var pcs [8]uintptr + n := runtime.Callers(0, pcs[:]) + frames := runtime.CallersFrames(pcs[:n]) + for { + frame, more := frames.Next() + if frame.Function == "main.checkFrames" { + if !strings.HasSuffix(frame.File, "main.go") || frame.Line == 0 { + panic("bad frame") + } + return + } + if !more { + break + } + } + panic("missing frame") +} + +//go:noinline +func checkFuncForPC() { + pc, _, _, ok := runtime.Caller(0) // FUNC_FILELINE_MARK + if !ok { + panic("missing pc") + } + fn := runtime.FuncForPC(pc) + if fn == nil { + panic("missing func") + } + if name := fn.Name(); name != "main.checkFuncForPC" { + panic("bad func: " + name) + } + if entry := fn.Entry(); entry == 0 { + panic("missing func entry") + } + file, line := fn.FileLine(pc) + if !strings.HasSuffix(file, "main.go") || line != FUNC_FILELINE_LINE { + panic("bad func fileline: " + file + ":" + strconv.Itoa(line)) + } +} + +//go:noinline +func checkFuncInfoRename() { + pc := renamedPC() + if name := runtime.FuncForPC(pc).Name(); name != "main.renamedPC" { + panic("bad renamed func: " + name) + } +} + +//go:linkname renamedPC main.renamedPCSymbol +//go:noinline +func renamedPC() uintptr { + pc, _, _, ok := runtime.Caller(0) + if !ok { + panic("missing renamed pc") + } + return pc +} + +//go:noinline +func checkRuntimeStack() { + var buf [4096]byte + n := runtime.Stack(buf[:], false) // RUNTIME_STACK_MARK + stack := string(buf[:n]) + if !strings.Contains(stack, "main.checkRuntimeStack") || !strings.Contains(stack, "main.go:RUNTIME_STACK_LINE") { + panic("bad runtime stack: " + stack) + } +} + +//go:noinline +func checkPanicStack() { + defer func() { // DEBUG_STACK_MARK + if recover() == nil { + panic("missing panic") + } + stack := string(debug.Stack()) + if !strings.Contains(stack, "main.checkPanicStack") || !strings.Contains(stack, "main.go:DEBUG_STACK_LINE") { + panic("bad stack: " + stack) + } + }() + s := []int{1, 2, 3} + _ = s[3] +} +` + +func TestRuntimeLineInfoAndStack(t *testing.T) { + source := runtimeLineInfoProbe + source = strings.ReplaceAll(source, "CALLER_LINE", strconv.Itoa(markerLine(source, "func checkCaller()"))) + source = strings.ReplaceAll(source, "CALLER_SKIP_LINE", strconv.Itoa(markerLine(source, "func checkCallerSkip()"))) + source = strings.ReplaceAll(source, "FUNC_FILELINE_LINE", strconv.Itoa(markerLine(source, "func checkFuncForPC()"))) + source = strings.ReplaceAll(source, "RUNTIME_STACK_LINE", strconv.Itoa(markerLine(source, "func checkRuntimeStack()"))) + source = strings.ReplaceAll(source, "DEBUG_STACK_LINE", strconv.Itoa(markerLine(source, "DEBUG_STACK_MARK"))) + + dir := t.TempDir() + file := filepath.Join(dir, "main.go") + if err := os.WriteFile(file, []byte(source), 0644); err != nil { + t.Fatal(err) + } + + repoRoot := findStringConversionRepoRoot(t) + t.Setenv("LLGO_ROOT", repoRoot) + cmd := exec.Command("go", "run", "./cmd/llgo", "run", "-a", file) + cmd.Dir = repoRoot + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("llgo lineinfo probe failed: %v\n%s", err, out) + } +} + +func markerLine(source, marker string) int { + line := 1 + for _, part := range strings.SplitAfter(source, "\n") { + if strings.Contains(part, marker) { + return line + } + line++ + } + panic("missing marker " + marker) +} diff --git a/test/goroot/xfail.yaml b/test/goroot/xfail.yaml index d16ea32788..458df947fc 100644 --- a/test/goroot/xfail.yaml +++ b/test/goroot/xfail.yaml @@ -2159,10 +2159,6 @@ xfails: directive: run case: fixedbugs/issue29504.go reason: latest main goroot run failure on darwin/arm64 - - platform: darwin/arm64 - directive: run - case: fixedbugs/issue29735.go - reason: latest main goroot run failure on darwin/arm64 - platform: darwin/arm64 directive: run case: fixedbugs/issue32477.go From 1e15db2a15e3a2989845ddf5f6e50a15d59e813f Mon Sep 17 00:00:00 2001 From: Li Jie Date: Tue, 30 Jun 2026 00:10:52 +0800 Subject: [PATCH 04/59] runtime: add statement line caller frames --- cl/caller_frame_test.go | 368 +++++++++++++++++++++++++ cl/compile.go | 105 ++++++- cl/instr.go | 227 +++++++++++++++ runtime/internal/lib/runtime/extern.go | 15 + runtime/internal/lib/runtime/symtab.go | 12 + runtime/internal/runtime/caller.go | 254 +++++++++++++++++ runtime/internal/runtime/z_rt.go | 1 + test/go/runtime_lineinfo_stack_test.go | 14 +- test/go/runtime_statement_line_test.go | 162 +++++++++++ 9 files changed, 1149 insertions(+), 9 deletions(-) create mode 100644 cl/caller_frame_test.go create mode 100644 runtime/internal/runtime/caller.go create mode 100644 test/go/runtime_statement_line_test.go diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go new file mode 100644 index 0000000000..965086fd2d --- /dev/null +++ b/cl/caller_frame_test.go @@ -0,0 +1,368 @@ +//go:build !llgo +// +build !llgo + +package cl + +import ( + "go/ast" + "go/parser" + "go/token" + "go/types" + "strings" + "testing" + + "github.com/goplus/gogen/packages" + llssa "github.com/goplus/llgo/ssa" + gossa "golang.org/x/tools/go/ssa" + "golang.org/x/tools/go/ssa/ssautil" +) + +func parseCallerFrameFile(t *testing.T, src string) *ast.File { + t.Helper() + file, err := parser.ParseFile(token.NewFileSet(), "caller_frame.go", src, 0) + if err != nil { + t.Fatal(err) + } + return file +} + +func TestFilesUseRuntimeCaller(t *testing.T) { + tests := []struct { + name string + src string + want bool + }{ + { + name: "runtime selector", + src: `package foo +import "runtime" +func f() { runtime.Caller(0) } +`, + want: true, + }, + { + name: "runtime alias", + src: `package foo +import rt "runtime" +func f() { rt.Callers(0, nil) } +`, + want: true, + }, + { + name: "runtime debug stack", + src: `package foo +import dbg "runtime/debug" +func f() { _ = dbg.Stack() } +`, + want: true, + }, + { + name: "dot import", + src: `package foo +import . "runtime" +func f() { _ = FuncForPC(0) } +`, + want: true, + }, + { + name: "blank import", + src: `package foo +import _ "runtime" +func f() {} +`, + want: false, + }, + { + name: "non caller runtime selector", + src: `package foo +import "runtime" +func f() { _ = runtime.GOOS } +`, + want: false, + }, + { + name: "caller name without runtime import", + src: `package foo +func f() { Caller(0) } +`, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := filesUseRuntimeCaller([]*ast.File{parseCallerFrameFile(t, tt.src)}); got != tt.want { + t.Fatalf("filesUseRuntimeCaller() = %v, want %v", got, tt.want) + } + }) + } + + badImport := &ast.File{ + Imports: []*ast.ImportSpec{{ + Path: &ast.BasicLit{Kind: token.STRING, Value: "runtime"}, + }}, + } + if filesUseRuntimeCaller([]*ast.File{badImport}) { + t.Fatal("invalid import literal should not enable caller frame tracking") + } +} + +func buildCallerFrameSSAPackage(t *testing.T, pkgPath, src string) (*gossa.Package, []*ast.File) { + t.Helper() + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, "caller_frame_compile.go", src, parser.ParseComments) + if err != nil { + t.Fatal(err) + } + files := []*ast.File{file} + imp := packages.NewImporter(fset) + mode := gossa.SanityCheckFunctions | gossa.InstantiateGenerics + ssapkg, _, err := ssautil.BuildPackage( + &types.Config{Importer: imp}, + fset, + types.NewPackage(pkgPath, file.Name.Name), + files, + mode, + ) + if err != nil { + t.Fatal(err) + } + return ssapkg, files +} + +func TestRuntimeCallerPackageDetection(t *testing.T) { + ssapkg, _ := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +import "runtime" +import "runtime/debug" + +func direct() { runtime.Caller(0) } +func stack() { _ = debug.Stack() } +func anonOnly() { func() { runtime.FuncForPC(0) }() } +func plain() {} +`) + if !packageUsesRuntimeCaller(ssapkg) { + t.Fatal("package should report runtime caller usage") + } + if !fnUsesRuntimeCaller(ssapkg.Func("direct")) { + t.Fatal("direct runtime.Caller use should be detected") + } + if !fnUsesRuntimeCaller(ssapkg.Func("stack")) { + t.Fatal("runtime/debug.Stack use should be detected") + } + if !fnUsesRuntimeCaller(ssapkg.Func("anonOnly")) { + t.Fatal("runtime caller use in anonymous functions should be detected") + } + if fnUsesRuntimeCaller(ssapkg.Func("plain")) { + t.Fatal("plain function should not report runtime caller usage") + } + + for _, name := range []string{"Caller", "Callers", "CallersFrames", "FuncForPC", "Stack"} { + if !isRuntimeCallerName(name) { + t.Fatalf("%s should be a runtime caller metadata function", name) + } + } + if isRuntimeCallerName("Version") { + t.Fatal("Version should not be a runtime caller metadata function") + } + + rtpkg, _ := buildCallerFrameSSAPackage(t, "github.com/goplus/llgo/runtime/internal/lib/runtime", `package runtime +func Caller(skip int) (uintptr, string, int, bool) { return 0, "", 0, false } +func FuncForPC(pc uintptr) uintptr { return 0 } +`) + if !isRuntimeCallerFunc(rtpkg.Func("Caller")) || !isRuntimeCallerLookupFunc(rtpkg.Func("Caller")) { + t.Fatal("LLGo runtime lib Caller should be treated as runtime.Caller") + } + if !isRuntimeCallerFunc(rtpkg.Func("FuncForPC")) { + t.Fatal("LLGo runtime lib FuncForPC should be treated as runtime metadata use") + } + if isRuntimeCallerLookupFunc(rtpkg.Func("FuncForPC")) { + t.Fatal("FuncForPC should not consume caller lookup tokens") + } +} + +func TestCallerFrameTrackingEligibility(t *testing.T) { + if (&context{}).shouldTrackCallerFrames() { + t.Fatal("missing compiler state should not track caller frames") + } + var nilContext *context + if nilContext.shouldTrackCallerFrames() { + t.Fatal("nil context should not track caller frames") + } + + tests := []struct { + name string + pkgPath string + track bool + targetName string + goarch string + want bool + }{ + {name: "enabled user package", pkgPath: "example.com/foo", track: true, want: true}, + {name: "disabled flag", pkgPath: "example.com/foo", want: false}, + {name: "named target", pkgPath: "example.com/foo", track: true, targetName: "esp32", want: false}, + {name: "wasm", pkgPath: "example.com/foo", track: true, goarch: "wasm", want: false}, + {name: "stdlib", pkgPath: "fmt", track: true, want: false}, + {name: "runtime", pkgPath: "runtime", track: true, want: false}, + {name: "llgo runtime", pkgPath: llssa.PkgRuntime, track: true, want: false}, + {name: "llgo runtime internal", pkgPath: "github.com/goplus/llgo/runtime/internal/foo", track: true, want: false}, + {name: "command line package", pkgPath: "command-line-arguments", track: true, want: true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + prog := llssa.NewProgram(nil) + if tt.targetName != "" { + prog.Target().Target = tt.targetName + } + if tt.goarch != "" { + prog.Target().GOARCH = tt.goarch + } + pkg := prog.NewPackage("foo", tt.pkgPath) + fn := pkg.NewFunc("f", llssa.NoArgsNoRet, llssa.InGo) + ctx := &context{prog: prog, pkg: pkg, fn: fn, trackCallerFrames: tt.track} + if got := ctx.shouldTrackCallerFrames(); got != tt.want { + t.Fatalf("shouldTrackCallerFrames() = %v, want %v", got, tt.want) + } + }) + } + + if canTrackCallerFramesForPackage("net/http") { + t.Fatal("stdlib paths without dots should not track caller frames") + } +} + +func TestRuntimeFrameNameNormalization(t *testing.T) { + tests := []struct { + in string + want string + }{ + {in: "command-line-arguments.main", want: "main.main"}, + {in: "example.com/foo.f$1", want: "example.com/foo.f.func1"}, + {in: "example.com/foo.f", want: "example.com/foo.f"}, + {in: "example.com/foo.f$", want: "example.com/foo.f$"}, + {in: "example.com/foo.f$inner", want: "example.com/foo.f$inner"}, + } + for _, tt := range tests { + if got := runtimeFrameName(tt.in); got != tt.want { + t.Fatalf("runtimeFrameName(%q) = %q, want %q", tt.in, got, tt.want) + } + } + + if got := (*context)(nil).runtimeCallerFrameName(); got != "" { + t.Fatalf("nil context runtimeCallerFrameName() = %q, want empty", got) + } + if got := (&context{}).runtimeCallerFrameName(); got != "" { + t.Fatalf("empty context runtimeCallerFrameName() = %q, want empty", got) + } + prog := newLLSSAProg(t) + pkg := prog.NewPackage("main", "command-line-arguments") + sig := types.NewSignatureType(nil, nil, nil, nil, nil, false) + ctx := &context{fn: pkg.NewFuncEx("command-line-arguments.f$1", sig, llssa.InGo, false, false)} + if got, want := ctx.runtimeCallerFrameName(), "main.f.func1"; got != want { + t.Fatalf("fallback runtimeCallerFrameName() = %q, want %q", got, want) + } +} + +func TestCompileRuntimeCallerFrameInstrumentation(t *testing.T) { + ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +import "runtime/debug" + +func f() { + _ = debug.Stack() +} +`) + prog := newLLSSAProg(t) + pkg, err := NewPackage(prog, ssapkg, files) + if err != nil { + t.Fatal(err) + } + ir := pkg.Module().String() + for _, want := range []string{ + "PushCallerFrame", + "SetCallerLookupLine", + "PopCallerFrame", + `c"example.com/foo.f`, + } { + if !strings.Contains(ir, want) { + t.Fatalf("compiled caller-frame IR missing %q:\n%s", want, ir) + } + } +} + +func TestCompileRuntimeCallerFrameUsesGoNameForLinkname(t *testing.T) { + ssapkg, files := buildCallerFrameSSAPackage(t, "command-line-arguments", `package main +import "runtime" + +func renamedPC() uintptr { + pc, _, _, _ := runtime.Caller(0) + return pc +} +`) + prog := newLLSSAProg(t) + prog.SetLinkname("command-line-arguments.renamedPC", "main.renamedPCSymbol") + pkg, err := NewPackage(prog, ssapkg, files) + if err != nil { + t.Fatal(err) + } + ir := pkg.Module().String() + if !strings.Contains(ir, `c"main.renamedPC"`) { + t.Fatalf("compiled caller-frame IR missing source function name:\n%s", ir) + } + if strings.Contains(ir, `c"main.renamedPCSymbol"`) { + t.Fatalf("compiled caller-frame IR used linkname target as runtime function name:\n%s", ir) + } +} + +func TestCompileRuntimeCallerFrameInstrumentationSkipped(t *testing.T) { + ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +import "runtime" + +func f() { + runtime.Caller(0) +} +`) + prog := newLLSSAProg(t) + prog.Target().Target = "esp32" + pkg, err := NewPackage(prog, ssapkg, files) + if err != nil { + t.Fatal(err) + } + if ir := pkg.Module().String(); strings.Contains(ir, "PushCallerFrame") { + t.Fatalf("target builds should not emit caller-frame tracking:\n%s", ir) + } + + ssapkg, files = buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +func f() {} +`) + prog = newLLSSAProg(t) + pkg, err = NewPackage(prog, ssapkg, files) + if err != nil { + t.Fatal(err) + } + if ir := pkg.Module().String(); strings.Contains(ir, "PushCallerFrame") || strings.Contains(ir, "SetCallerLine") { + t.Fatalf("packages without runtime stack APIs should not emit caller-frame tracking:\n%s", ir) + } +} + +func TestCompileRuntimeCallerLookupTokenOnlyForRuntimeAPIs(t *testing.T) { + ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +import "runtime" + +func helper() {} + +func f() { + helper() + runtime.Caller(0) +} +`) + prog := newLLSSAProg(t) + pkg, err := NewPackage(prog, ssapkg, files) + if err != nil { + t.Fatal(err) + } + ir := pkg.Module().String() + if !strings.Contains(ir, "SetCallerLookupLine") { + t.Fatalf("runtime.Caller should enable caller lookup:\n%s", ir) + } + if !strings.Contains(ir, "SetCallerLine") { + t.Fatalf("ordinary calls in an instrumented package should only update the current line:\n%s", ir) + } +} diff --git a/cl/compile.go b/cl/compile.go index fe1dbfc8b1..27282c7461 100644 --- a/cl/compile.go +++ b/cl/compile.go @@ -199,6 +199,9 @@ type context struct { rewrites map[string]string embedMap goembed.VarMap embedInits []embedInit + + trackCallerFrames bool + callerFrameMark llssa.Expr } func (p *context) rewriteValue(name string) (string, bool) { @@ -214,6 +217,79 @@ func (p *context) rewriteValue(name string) (string, bool) { return val, ok } +func filesUseRuntimeCaller(files []*ast.File) bool { + for _, file := range files { + imports := make(map[string]string) + dotImports := make(map[string]bool) + for _, imp := range file.Imports { + path, err := strconv.Unquote(imp.Path.Value) + if err != nil { + continue + } + switch path { + case "runtime", "runtime/debug": + default: + continue + } + name := path[strings.LastIndex(path, "/")+1:] + if imp.Name != nil { + switch imp.Name.Name { + case ".": + dotImports[path] = true + continue + case "_": + continue + default: + name = imp.Name.Name + } + } + imports[name] = path + } + if len(imports) == 0 && len(dotImports) == 0 { + continue + } + found := false + ast.Inspect(file, func(n ast.Node) bool { + if found { + return false + } + switch n := n.(type) { + case *ast.SelectorExpr: + ident, ok := n.X.(*ast.Ident) + if !ok { + return true + } + if runtimeCallerSelector(imports[ident.Name], n.Sel.Name) { + found = true + return false + } + case *ast.Ident: + if (dotImports["runtime"] && isRuntimeCallerName(n.Name)) || + (dotImports["runtime/debug"] && n.Name == "Stack") { + found = true + return false + } + } + return true + }) + if found { + return true + } + } + return false +} + +func runtimeCallerSelector(path, name string) bool { + switch path { + case "runtime": + return isRuntimeCallerName(name) + case "runtime/debug": + return name == "Stack" + default: + return false + } +} + // isStringPtrType checks if typ is a pointer to the basic string type (*string). // This is used to validate that -ldflags -X can only rewrite variables of type *string, // not derived string types like "type T string". @@ -513,12 +589,13 @@ func (p *context) compileFuncDecl(pkg llssa.Package, f *ssa.Function) (llssa.Fun dbgEnabled := enableDbg && (f == nil || f.Origin() == nil) dbgSymsEnabled := enableDbgSyms && (f == nil || f.Origin() == nil) p.inits = append(p.inits, func() { - oldFn, oldGoFn, oldMethodNilDerefChecks := p.fn, p.goFn, p.methodNilDerefChecks + oldFn, oldGoFn, oldMethodNilDerefChecks, oldCallerFrameMark := p.fn, p.goFn, p.methodNilDerefChecks, p.callerFrameMark p.fn = fn p.goFn = f + p.callerFrameMark = llssa.Nil p.state = state // restore pkgState when compiling funcBody defer func() { - p.fn, p.goFn, p.methodNilDerefChecks = oldFn, oldGoFn, oldMethodNilDerefChecks + p.fn, p.goFn, p.methodNilDerefChecks, p.callerFrameMark = oldFn, oldGoFn, oldMethodNilDerefChecks, oldCallerFrameMark }() p.phis = nil if dbgSymsEnabled { @@ -669,6 +746,9 @@ func (p *context) compileBlock(b llssa.Builder, block *ssa.BasicBlock, n int, do var instrs = block.Instrs[n:] var ret = fn.Block(block.Index) b.SetBlock(ret) + if block.Index == 0 && p.shouldTrackCallerFrames() { + p.pushCallerFrame(b, block.Parent()) + } if block.Index == 0 && enableCallTracing && !strings.HasPrefix(fn.Name(), "github.com/goplus/llgo/runtime/internal/runtime.Print") { b.Printf("call " + fn.Name() + "\n\x00") } @@ -1057,6 +1137,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue if t := p.type_(v.Type(), llssa.InGo); t.RawType() != nil { if p.isLargeNonPointerValue(t) { x := p.compileValue(b, v.X) + p.setCallerLine(b, v.Pos()) p.assertNilDerefBase(b, v.X) b.AssertNilDeref(x) return @@ -1070,6 +1151,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue // Zero-length slice-to-array conversions can leave only // an unused slice deref; preserve its required nil check. x := p.compileValue(b, v.X) + p.setCallerLine(b, v.Pos()) p.assertNilDerefBase(b, v.X) b.AssertNilDeref(x) return @@ -1100,6 +1182,9 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue } } x := p.compileValue(b, v.X) + if v.Op != token.ARROW { + p.setCallerLine(b, v.Pos()) + } if shouldAssertDirectNilDeref(v) { b.AssertNilDeref(x) } @@ -1135,6 +1220,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue ret = b.Convert(p.type_(t, llssa.InGo), x) case *ssa.FieldAddr: x := p.compileValue(b, v.X) + p.setCallerLine(b, v.Pos()) if p.isAddressOfFieldAddr(v) { b.AssertNilDeref(x) } @@ -1156,10 +1242,12 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue } x := p.compileValue(b, vx) idx := p.compileValue(b, v.Index) + p.setCallerLine(b, v.Pos()) ret = b.IndexAddr(x, idx) case *ssa.Index: x := p.compileValue(b, v.X) idx := p.compileValue(b, v.Index) + p.setCallerLine(b, v.Pos()) ret = b.Index(x, idx, func() (addr llssa.Expr, zero bool) { switch n := v.X.(type) { case *ssa.Const: @@ -1193,6 +1281,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue if v.Max != nil { max = p.compileValue(b, v.Max) } + p.setCallerLine(b, v.Pos()) ret = b.Slice(x, low, high, max) ret.Type = p.type_(v.Type(), llssa.InGo) case *ssa.MakeInterface: @@ -1249,6 +1338,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue case *ssa.TypeAssert: x := p.compileValue(b, v.X) t := p.type_(v.AssertedType, llssa.InGo) + p.setCallerLine(b, v.Pos()) ret = b.TypeAssert(x, t, v.CommaOk) case *ssa.Extract: x := p.compileValue(b, v.Tuple) @@ -1289,6 +1379,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue case *ssa.SliceToArrayPointer: t := p.type_(v.Type(), llssa.InGo) x := p.compileValue(b, v.X) + p.setCallerLine(b, v.Pos()) ret = b.SliceToArrayPointer(x, t) default: panic(fmt.Sprintf("compileInstrAndValue: unknown instr - %T\n", iv)) @@ -1423,8 +1514,12 @@ func (p *context) compileInstr(b llssa.Builder, instr ssa.Instruction) { } } if p.returnNeedsImplicitRunDefers(v) { + p.setCallerLine(b, v.Pos()) b.RunDefers() } + if p.shouldTrackCallerFrames() { + p.popCallerFrame(b) + } b.Return(results...) case *ssa.If: fn := p.fn @@ -1437,6 +1532,7 @@ func (p *context) compileInstr(b llssa.Builder, instr ssa.Instruction) { m := p.compileValue(b, v.Map) key := p.compileValue(b, v.Key) val := p.compileValue(b, v.Value) + p.setCallerLine(b, v.Pos()) b.MapUpdate(m, key, val) case *ssa.Defer: if v.DeferStack != nil { @@ -1447,13 +1543,16 @@ func (p *context) compileInstr(b llssa.Builder, instr ssa.Instruction) { case *ssa.Go: p.call(b, llssa.Go, &v.Call) case *ssa.RunDefers: + p.setCallerLine(b, v.Pos()) b.RunDefers() case *ssa.Panic: arg := p.compileValue(b, v.X) + p.setCallerLine(b, v.Pos()) b.Panic(arg) case *ssa.Send: ch := p.compileValue(b, v.Chan) x := p.compileValue(b, v.X) + p.setCallerLine(b, v.Pos()) b.Send(ch, x) case *ssa.DebugRef: if enableDbgSyms && v.Parent().Origin() == nil { @@ -1769,6 +1868,8 @@ func newPackageEx(prog llssa.Program, patches Patches, rewrites map[string]strin }, cgoSymbols: make([]string, 0, 128), rewrites: rewrites, + + trackCallerFrames: filesUseRuntimeCaller(files) || packageUsesRuntimeCaller(pkg), } if embedMap != nil { ctx.embedMap = *embedMap diff --git a/cl/instr.go b/cl/instr.go index b7fc52abd3..0c6a2d69a6 100644 --- a/cl/instr.go +++ b/cl/instr.go @@ -853,6 +853,232 @@ func (p *context) sourceLine(filename string, line int) (string, bool) { return lines[line-1], true } +func (p *context) shouldTrackCallerFrames() bool { + if p == nil || p.pkg == nil || p.fn == nil || !p.trackCallerFrames { + return false + } + if target := p.prog.Target(); target != nil && (target.Target != "" || target.GOARCH == "wasm") { + return false + } + return canTrackCallerFramesForPackage(p.pkg.Path()) +} + +func canTrackCallerFramesForPackage(pkgPath string) bool { + return pkgPath != llssa.PkgRuntime && + pkgPath != "runtime" && + !isStandardLibraryPackage(pkgPath) && + !strings.HasPrefix(pkgPath, "github.com/goplus/llgo/runtime/internal/") +} + +func isStandardLibraryPackage(pkgPath string) bool { + return pkgPath != "command-line-arguments" && !strings.Contains(pkgPath, ".") +} + +func packageUsesRuntimeCaller(pkg *ssa.Package) bool { + if pkg == nil { + return false + } + for _, member := range pkg.Members { + fn, ok := member.(*ssa.Function) + if ok && fnUsesRuntimeCaller(fn) { + return true + } + } + return false +} + +func fnUsesRuntimeCaller(fn *ssa.Function) bool { + if fn == nil { + return false + } + for _, block := range fn.Blocks { + for _, instr := range block.Instrs { + call, ok := instr.(ssa.CallInstruction) + if !ok { + continue + } + if isRuntimeCallerFunc(call.Common().StaticCallee()) { + return true + } + } + } + for _, anon := range fn.AnonFuncs { + if fnUsesRuntimeCaller(anon) { + return true + } + } + return false +} + +func isRuntimeCallerFunc(fn *ssa.Function) bool { + if fn == nil || fn.Pkg == nil || fn.Pkg.Pkg == nil { + return false + } + switch fn.Pkg.Pkg.Path() { + case "runtime", "github.com/goplus/llgo/runtime/internal/lib/runtime": + return isRuntimeCallerName(fn.Name()) + case "runtime/debug": + return fn.Name() == "Stack" + default: + return false + } +} + +func isRuntimeCallerLookupFunc(fn *ssa.Function) bool { + if fn == nil || fn.Pkg == nil || fn.Pkg.Pkg == nil { + return false + } + switch fn.Pkg.Pkg.Path() { + case "runtime", "github.com/goplus/llgo/runtime/internal/lib/runtime": + switch fn.Name() { + case "Caller", "Callers", "Stack": + return true + } + case "runtime/debug": + return fn.Name() == "Stack" + } + return false +} + +func isRuntimeCallerName(name string) bool { + switch name { + case "Caller", "Callers", "CallersFrames", "FuncForPC", "Stack": + return true + default: + return false + } +} + +func (p *context) pushCallerFrame(b llssa.Builder, fn *ssa.Function) { + if fn == nil { + return + } + pos := p.fset.Position(fn.Pos()) + entry := b.Convert(p.prog.Uintptr(), p.fn.Expr) + p.callerFrameMark = b.Call( + p.runtimeFunc("PushCallerFrame", pushCallerFrameSig()), + entry, + b.Str(p.runtimeCallerFrameName()), + b.Str(pos.Filename), + p.prog.IntVal(uint64(pos.Line), p.prog.Int()), + ) +} + +func (p *context) runtimeCallerFrameName() string { + if p == nil { + return "" + } + if p.goFn != nil && p.goFn.Pkg != nil && p.goFn.Pkg.Pkg != nil { + return runtimeFrameName(funcName(p.goFn.Pkg.Pkg, p.goFn, false)) + } + if p.fn != nil { + return runtimeFrameName(p.fn.Name()) + } + return "" +} + +func (p *context) setCallerLine(b llssa.Builder, pos token.Pos) { + if !p.shouldTrackCallerFrames() { + return + } + line := p.fset.Position(pos).Line + p.setCallerLineNumber(b, line) +} + +func (p *context) setCallerLineForCall(b llssa.Builder, call *ssa.CallCommon) { + if !p.shouldTrackCallerFrames() { + return + } + line := p.fset.Position(call.Pos()).Line + if line <= 0 { + return + } + fn := "SetCallerLine" + sig := setCallerLineSig() + if isRuntimeCallerLookupFunc(call.StaticCallee()) { + fn = "SetCallerLookupLine" + sig = setCallerLookupLineSig() + } + b.Call(p.runtimeFunc(fn, sig), p.prog.IntVal(uint64(line), p.prog.Int())) +} + +func (p *context) setCallerLineNumber(b llssa.Builder, line int) { + if line <= 0 { + return + } + b.Call(p.runtimeFunc("SetCallerLine", setCallerLineSig()), p.prog.IntVal(uint64(line), p.prog.Int())) +} + +func (p *context) popCallerFrame(b llssa.Builder) { + if p.callerFrameMark.IsNil() { + return + } + b.Call(p.runtimeFunc("PopCallerFrame", popCallerFrameSig()), p.callerFrameMark) +} + +func (p *context) runtimeFunc(name string, sig *types.Signature) llssa.Expr { + p.pkg.NeedRuntime = true + fullName := llssa.PkgRuntime + "." + name + if fn := p.pkg.FuncOf(fullName); fn != nil { + return fn.Expr + } + return p.pkg.NewFuncEx(fullName, sig, llssa.InGo, false, false).Expr +} + +func pushCallerFrameSig() *types.Signature { + return types.NewSignatureType(nil, nil, nil, + types.NewTuple( + types.NewVar(token.NoPos, nil, "entry", types.Typ[types.Uintptr]), + types.NewVar(token.NoPos, nil, "name", types.Typ[types.String]), + types.NewVar(token.NoPos, nil, "file", types.Typ[types.String]), + types.NewVar(token.NoPos, nil, "startLine", types.Typ[types.Int]), + ), + types.NewTuple(types.NewVar(token.NoPos, nil, "", types.Typ[types.Int])), + false, + ) +} + +func setCallerLineSig() *types.Signature { + return types.NewSignatureType(nil, nil, nil, + types.NewTuple(types.NewVar(token.NoPos, nil, "line", types.Typ[types.Int])), + nil, + false, + ) +} + +func setCallerLookupLineSig() *types.Signature { + return setCallerLineSig() +} + +func popCallerFrameSig() *types.Signature { + return types.NewSignatureType(nil, nil, nil, + types.NewTuple(types.NewVar(token.NoPos, nil, "mark", types.Typ[types.Int])), + nil, + false, + ) +} + +func runtimeFrameName(name string) string { + const commandLineArguments = "command-line-arguments." + if strings.HasPrefix(name, commandLineArguments) { + name = "main." + name[len(commandLineArguments):] + } + return normalizeRuntimeAnonFuncName(name) +} + +func normalizeRuntimeAnonFuncName(name string) string { + dollar := strings.LastIndexByte(name, '$') + if dollar < 0 || dollar == len(name)-1 { + return name + } + for i := dollar + 1; i < len(name); i++ { + if name[i] < '0' || name[i] > '9' { + return name + } + } + return name[:dollar] + ".func" + name[dollar+1:] +} + // ----------------------------------------------------------------------------- type explicitDeferStack struct { @@ -1049,6 +1275,7 @@ func collectMethodNilDerefChecks(fn *ssa.Function) map[*ssa.UnOp]none { } func (p *context) callEx(b llssa.Builder, act llssa.DoAction, call *ssa.CallCommon, ds *explicitDeferStack) (ret llssa.Expr) { + p.setCallerLineForCall(b, call) cv := call.Value if mthd := call.Method; mthd != nil { reflectCheck := p.reflectTypeMethodCheck(call, mthd) diff --git a/runtime/internal/lib/runtime/extern.go b/runtime/internal/lib/runtime/extern.go index 377c973876..d6835b794f 100644 --- a/runtime/internal/lib/runtime/extern.go +++ b/runtime/internal/lib/runtime/extern.go @@ -6,9 +6,21 @@ package runtime import ( clitedebug "github.com/goplus/llgo/runtime/internal/clite/debug" + rtdebug "github.com/goplus/llgo/runtime/internal/runtime" ) func Caller(skip int) (pc uintptr, file string, line int, ok bool) { + if frame, ok := rtdebug.Caller(skip); ok { + file = frame.File + line = frame.Line + if file == "" { + file = "???" + } + if line == 0 { + line = 1 + } + return frame.PC, file, line, true + } var pcs [1]uintptr if Callers(skip+2, pcs[:]) < 1 { return 0, "", 0, false @@ -25,6 +37,9 @@ func Caller(skip int) (pc uintptr, file string, line int, ok bool) { } func Callers(skip int, pc []uintptr) int { + if n := rtdebug.Callers(skip, pc); n > 0 { + return n + } return callers(skip+1, pc) } diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 84e04aa943..bd0a616018 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -9,6 +9,7 @@ import ( c "github.com/goplus/llgo/runtime/internal/clite" clitedebug "github.com/goplus/llgo/runtime/internal/clite/debug" + rtdebug "github.com/goplus/llgo/runtime/internal/runtime" ) // Frames may be used to get function/file/line information for a @@ -335,6 +336,17 @@ func addrInfoSymbol(pc uintptr) pcSymbol { } func frameSymbol(pc uintptr) pcSymbol { + if frame, ok := rtdebug.FrameForPC(pc); ok { + return pcSymbol{ + pc: pc, + entry: frame.Entry, + function: frame.Function, + file: frame.File, + line: frame.Line, + startLine: frame.StartLine, + ok: true, + } + } sym := addrInfoSymbol(pc) if pc == 0 { return sym diff --git a/runtime/internal/runtime/caller.go b/runtime/internal/runtime/caller.go new file mode 100644 index 0000000000..ba2a212fed --- /dev/null +++ b/runtime/internal/runtime/caller.go @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package runtime + +import ( + "unsafe" + + "github.com/goplus/llgo/runtime/internal/clite/tls" +) + +type CallerFrame struct { + PC uintptr + Entry uintptr + Function string + File string + Line int + StartLine int +} + +const ( + callerPCMask = uintptr(3) + callerPCValue = uintptr(1) + callersPCValue = uintptr(3) + callerPCRingSize = 1024 +) + +type callerPCStore struct { + next uintptr + frames [callerPCRingSize]CallerFrame +} + +var ( + callerFrameTLS = tls.Alloc[[]CallerFrame](nil) + callerPCStoreTLS = tls.Alloc[*callerPCStore](nil) + callerLookupTLS = tls.Alloc[bool](nil) + panicCallerFrameTLS = tls.Alloc[[]CallerFrame](nil) +) + +var ( + runtimeCallersFrame = CallerFrame{Function: "runtime.Callers"} + runtimeMainFrame = CallerFrame{Function: "runtime.main"} + runtimeGoexitFrame = CallerFrame{Function: "runtime.goexit"} +) + +func PushCallerFrame(entry uintptr, name, file string, startLine int) int { + frames := callerFrameTLS.Get() + mark := len(frames) + frames = append(frames, CallerFrame{ + PC: entry, + Entry: entry, + Function: name, + File: file, + Line: startLine, + StartLine: startLine, + }) + callerFrameTLS.Set(frames) + return mark +} + +func SetCallerLine(line int) { + frames := callerFrameTLS.Get() + if line <= 0 || len(frames) == 0 { + return + } + frames[len(frames)-1].Line = line + callerFrameTLS.Set(frames) +} + +func SetCallerLookupLine(line int) { + SetCallerLine(line) + callerLookupTLS.Set(true) +} + +func PopCallerFrame(mark int) { + frames := callerFrameTLS.Get() + oldLen := len(frames) + if mark < 0 || mark > oldLen { + return + } + var zero CallerFrame + for i := mark; i < oldLen; i++ { + frames[i] = zero + } + callerFrameTLS.Set(frames[:mark]) + + panicFrames := panicCallerFrameTLS.Get() + if len(panicFrames) > 0 && oldLen >= len(panicFrames) && mark <= len(panicFrames) { + for i := range panicFrames { + panicFrames[i] = zero + } + panicCallerFrameTLS.Clear() + } +} + +func SavePanicCallerFrames() { + frames := callerFrameTLS.Get() + if len(frames) == 0 { + panicCallerFrameTLS.Clear() + return + } + panicFrames := panicCallerFrameTLS.Get() + if cap(panicFrames) < len(frames) { + panicFrames = make([]CallerFrame, len(frames)) + } else { + panicFrames = panicFrames[:len(frames)] + } + copy(panicFrames, frames) + panicCallerFrameTLS.Set(panicFrames) +} + +func Caller(skip int) (CallerFrame, bool) { + if !takeCallerLookup() { + return CallerFrame{}, false + } + if skip < 0 { + return CallerFrame{}, false + } + frames := callerFrameTLS.Get() + panicFrames := panicCallerFrameTLS.Get() + if len(frames) == 0 { + if skip < len(panicFrames) { + return captureFrame(panicFrames[len(panicFrames)-1-skip], callerPCValue), true + } + return CallerFrame{}, false + } + if skip < len(frames) { + return captureFrame(frames[len(frames)-1-skip], callerPCValue), true + } + if len(panicFrames) > len(frames) { + idx := len(panicFrames) - 1 - skip + if idx >= 0 { + return captureFrame(panicFrames[idx], callerPCValue), true + } + } + switch skip - len(frames) { + case 0: + return captureFrame(runtimeMainFrame, callerPCValue), true + case 1: + return captureFrame(runtimeGoexitFrame, callerPCValue), true + default: + return CallerFrame{}, false + } +} + +func Callers(skip int, pcs []uintptr) int { + if !takeCallerLookup() { + return 0 + } + if skip < 0 { + skip = 0 + } + frames := callerFrameTLS.Get() + if len(frames) == 0 { + frames = panicCallerFrameTLS.Get() + } + if len(frames) == 0 { + return 0 + } + n := 0 + add := func(frame CallerFrame) bool { + if skip > 0 { + skip-- + return true + } + if n >= len(pcs) { + return false + } + pcs[n] = captureFrame(frame, callersPCValue).PC + n++ + return true + } + if !add(runtimeCallersFrame) { + return n + } + for i := len(frames) - 1; i >= 0; i-- { + if !add(frames[i]) { + return n + } + } + _ = add(runtimeMainFrame) + _ = add(runtimeGoexitFrame) + return n +} + +func takeCallerLookup() bool { + if !callerLookupTLS.Get() { + return false + } + callerLookupTLS.Set(false) + return true +} + +func FrameForPC(pc uintptr) (CallerFrame, bool) { + if pc&callerPCMask == 0 { + return CallerFrame{}, false + } + store := callerPCStoreTLS.Get() + if store == nil { + return CallerFrame{}, false + } + addr := pc &^ callerPCMask + if !store.contains(addr) { + return CallerFrame{}, false + } + frame := *(*CallerFrame)(unsafe.Pointer(addr)) + return frame, true +} + +func callerPCStoreForThread() *callerPCStore { + store := callerPCStoreTLS.Get() + if store == nil { + store = new(callerPCStore) + callerPCStoreTLS.Set(store) + } + return store +} + +func captureFrame(frame CallerFrame, pcValue uintptr) CallerFrame { + store := callerPCStoreForThread() + idx := store.next & (callerPCRingSize - 1) + store.next++ + store.frames[idx] = frame + rec := &store.frames[idx] + pc := uintptr(unsafe.Pointer(rec)) | pcValue + rec.PC = pc + if rec.Entry == 0 { + rec.Entry = pc + } + return *rec +} + +func (s *callerPCStore) contains(addr uintptr) bool { + start := uintptr(unsafe.Pointer(&s.frames[0])) + size := unsafe.Sizeof(s.frames) + end := start + size + if addr < start || addr >= end { + return false + } + return (addr-start)%unsafe.Sizeof(s.frames[0]) == 0 +} diff --git a/runtime/internal/runtime/z_rt.go b/runtime/internal/runtime/z_rt.go index 3b17c951e1..4cd79f22ac 100644 --- a/runtime/internal/runtime/z_rt.go +++ b/runtime/internal/runtime/z_rt.go @@ -49,6 +49,7 @@ func Recover() (ret any) { // Panic panics with a value. func Panic(v any) { + SavePanicCallerFrames() ptr := c.Malloc(unsafe.Sizeof(v)) *(*any)(ptr) = v excepKey.Set(ptr) diff --git a/test/go/runtime_lineinfo_stack_test.go b/test/go/runtime_lineinfo_stack_test.go index 4d46601112..e9c9bf7334 100644 --- a/test/go/runtime_lineinfo_stack_test.go +++ b/test/go/runtime_lineinfo_stack_test.go @@ -55,7 +55,7 @@ func checkCaller() { //go:noinline func checkCallerSkip() { - helperCallerSkip() + helperCallerSkip() // CALLER_SKIP_MARK } //go:noinline @@ -142,7 +142,7 @@ func checkPanicStack() { if recover() == nil { panic("missing panic") } - stack := string(debug.Stack()) + stack := string(debug.Stack()) // DEBUG_STACK_CALL_MARK if !strings.Contains(stack, "main.checkPanicStack") || !strings.Contains(stack, "main.go:DEBUG_STACK_LINE") { panic("bad stack: " + stack) } @@ -154,11 +154,11 @@ func checkPanicStack() { func TestRuntimeLineInfoAndStack(t *testing.T) { source := runtimeLineInfoProbe - source = strings.ReplaceAll(source, "CALLER_LINE", strconv.Itoa(markerLine(source, "func checkCaller()"))) - source = strings.ReplaceAll(source, "CALLER_SKIP_LINE", strconv.Itoa(markerLine(source, "func checkCallerSkip()"))) - source = strings.ReplaceAll(source, "FUNC_FILELINE_LINE", strconv.Itoa(markerLine(source, "func checkFuncForPC()"))) - source = strings.ReplaceAll(source, "RUNTIME_STACK_LINE", strconv.Itoa(markerLine(source, "func checkRuntimeStack()"))) - source = strings.ReplaceAll(source, "DEBUG_STACK_LINE", strconv.Itoa(markerLine(source, "DEBUG_STACK_MARK"))) + source = strings.ReplaceAll(source, "CALLER_LINE", strconv.Itoa(markerLine(source, "CALLER_MARK"))) + source = strings.ReplaceAll(source, "CALLER_SKIP_LINE", strconv.Itoa(markerLine(source, "CALLER_SKIP_MARK"))) + source = strings.ReplaceAll(source, "FUNC_FILELINE_LINE", strconv.Itoa(markerLine(source, "FUNC_FILELINE_MARK"))) + source = strings.ReplaceAll(source, "RUNTIME_STACK_LINE", strconv.Itoa(markerLine(source, "RUNTIME_STACK_MARK"))) + source = strings.ReplaceAll(source, "DEBUG_STACK_LINE", strconv.Itoa(markerLine(source, "DEBUG_STACK_CALL_MARK"))) dir := t.TempDir() file := filepath.Join(dir, "main.go") diff --git a/test/go/runtime_statement_line_test.go b/test/go/runtime_statement_line_test.go new file mode 100644 index 0000000000..d534b36dc8 --- /dev/null +++ b/test/go/runtime_statement_line_test.go @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package gotest + +import ( + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "testing" +) + +const runtimeStatementLineProbe = `package main + +import ( + "runtime" + "runtime/debug" + "strconv" + "strings" +) + +type Wrapper struct { + a []int +} + +func (w Wrapper) Get(i int) int { + return w.a[i] +} + +func main() { + checkCallerStatement() + checkCallersFramesStatement() + checkAdjacentRuntimeStack() + checkRecoveredDebugStackBounds() +} + +//go:noinline +func checkCallerStatement() { + _, file, line, ok := runtime.Caller(0) // CALLER_STMT_MARK + if !ok || !strings.HasSuffix(file, "main.go") || line != CALLER_STMT_LINE { + panic("bad caller statement: " + file + ":" + strconv.Itoa(line)) + } +} + +//go:noinline +func checkCallersFramesStatement() { + var pcs [16]uintptr + n := runtime.Callers(0, pcs[:]) // CALLERS_STMT_MARK + frames := runtime.CallersFrames(pcs[:n]) + for { + frame, more := frames.Next() + if frame.Function == "main.checkCallersFramesStatement" { + if !strings.HasSuffix(frame.File, "main.go") || frame.Line != CALLERS_STMT_LINE { + panic("bad callers frame: " + frame.File + ":" + strconv.Itoa(frame.Line)) + } + fn := runtime.FuncForPC(frame.PC - 1) + if fn == nil || fn.Name() != "main.checkCallersFramesStatement" { + name := "" + if fn != nil { + name = fn.Name() + } + panic("bad FuncForPC(pc-1): " + name) + } + file, line := fn.FileLine(frame.PC - 1) + if !strings.HasSuffix(file, "main.go") || line != CALLERS_STMT_LINE { + panic("bad Func.FileLine(pc-1): " + file + ":" + strconv.Itoa(line)) + } + return + } + if !more { + break + } + } + panic("missing callers frame") +} + +//go:noinline +func checkAdjacentRuntimeStack() { + var buf1, buf2 [4096]byte + n1 := runtime.Stack(buf1[:], false) // STACK_ONE_MARK + n2 := runtime.Stack(buf2[:], false) // STACK_TWO_MARK + line1 := stackLineFor(string(buf1[:n1]), "main.checkAdjacentRuntimeStack") + line2 := stackLineFor(string(buf2[:n2]), "main.checkAdjacentRuntimeStack") + if line1 != STACK_ONE_LINE || line2 != STACK_TWO_LINE || line1+1 != line2 { + panic("bad adjacent stack lines: " + strconv.Itoa(line1) + "," + strconv.Itoa(line2)) + } +} + +//go:noinline +func checkRecoveredDebugStackBounds() { + defer func() { + if recover() == nil { + panic("missing bounds panic") + } + stack := string(debug.Stack()) + if !strings.Contains(stack, "main.go:BOUNDS_LINE") { + panic("bad recovered stack: " + stack) + } + }() + foo := Wrapper{a: []int{0, 1, 2}} + _ = foo.Get(3) // BOUNDS_MARK +} + +func stackLineFor(stack, fn string) int { + lines := strings.Split(stack, "\n") + for i := 0; i+1 < len(lines); i++ { + if strings.TrimSpace(lines[i]) == fn+"()" { + loc := strings.TrimSpace(lines[i+1]) + colon := strings.LastIndexByte(loc, ':') + if colon < 0 { + return 0 + } + rest := loc[colon+1:] + end := strings.IndexByte(rest, ' ') + if end >= 0 { + rest = rest[:end] + } + n, _ := strconv.Atoi(rest) + return n + } + } + return 0 +} +` + +func TestRuntimeStatementLineInfo(t *testing.T) { + source := runtimeStatementLineProbe + source = strings.ReplaceAll(source, "CALLER_STMT_LINE", strconv.Itoa(markerLine(source, "CALLER_STMT_MARK"))) + source = strings.ReplaceAll(source, "CALLERS_STMT_LINE", strconv.Itoa(markerLine(source, "CALLERS_STMT_MARK"))) + source = strings.ReplaceAll(source, "STACK_ONE_LINE", strconv.Itoa(markerLine(source, "STACK_ONE_MARK"))) + source = strings.ReplaceAll(source, "STACK_TWO_LINE", strconv.Itoa(markerLine(source, "STACK_TWO_MARK"))) + source = strings.ReplaceAll(source, "BOUNDS_LINE", strconv.Itoa(markerLine(source, "BOUNDS_MARK"))) + + dir := t.TempDir() + file := filepath.Join(dir, "main.go") + if err := os.WriteFile(file, []byte(source), 0644); err != nil { + t.Fatal(err) + } + + repoRoot := findStringConversionRepoRoot(t) + t.Setenv("LLGO_ROOT", repoRoot) + cmd := exec.Command("go", "run", "./cmd/llgo", "run", "-a", file) + cmd.Dir = repoRoot + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("llgo statement line probe failed: %v\n%s", err, out) + } +} From 407e0dd8a5ffc2986e85803feefe7cba85d59324 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Tue, 30 Jun 2026 12:42:11 +0800 Subject: [PATCH 05/59] runtime: compress funcinfo table --- cl/caller_frame_test.go | 56 ++- cl/compile.go | 36 +- cl/instr.go | 178 +++++--- internal/build/funcinfo/funcinfo.go | 264 ++++++++++-- internal/build/funcinfo/funcinfo_test.go | 154 +++++-- internal/build/funcinfo_table.go | 87 ++-- internal/build/funcinfo_table_test.go | 14 +- runtime/internal/lib/runtime/extern.go | 1 + .../lib/runtime/pprof_runtime_stub_llgo.go | 65 ++- runtime/internal/lib/runtime/symtab.go | 128 +++++- runtime/internal/runtime/caller.go | 404 +++++++++++++----- 11 files changed, 1067 insertions(+), 320 deletions(-) diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index 965086fd2d..4239a062a9 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -135,6 +135,9 @@ import "runtime" import "runtime/debug" func direct() { runtime.Caller(0) } +func indirect() { direct() } +func dynamic(f func()) { f() } +func dynamicCaller() { dynamic(direct) } func stack() { _ = debug.Stack() } func anonOnly() { func() { runtime.FuncForPC(0) }() } func plain() {} @@ -145,6 +148,9 @@ func plain() {} if !fnUsesRuntimeCaller(ssapkg.Func("direct")) { t.Fatal("direct runtime.Caller use should be detected") } + if !fnUsesRuntimeCaller(ssapkg.Func("indirect")) { + t.Fatal("transitive runtime.Caller use should be detected") + } if !fnUsesRuntimeCaller(ssapkg.Func("stack")) { t.Fatal("runtime/debug.Stack use should be detected") } @@ -154,6 +160,15 @@ func plain() {} if fnUsesRuntimeCaller(ssapkg.Func("plain")) { t.Fatal("plain function should not report runtime caller usage") } + runtimeCallerFuncs := runtimeCallerFuncSet(ssapkg) + for _, name := range []string{"dynamic", "dynamicCaller"} { + if !runtimeCallerFuncs[ssapkg.Func(name)] { + t.Fatalf("%s should be tracked because dynamic calls may reach runtime stack APIs", name) + } + } + if runtimeCallerFuncs[ssapkg.Func("plain")] { + t.Fatal("plain function should not be tracked") + } for _, name := range []string{"Caller", "Callers", "CallersFrames", "FuncForPC", "Stack"} { if !isRuntimeCallerName(name) { @@ -208,6 +223,10 @@ func TestCallerFrameTrackingEligibility(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + ssapkg, _ := buildCallerFrameSSAPackage(t, tt.pkgPath, `package foo +import "runtime" +func f() { runtime.Caller(0) } +`) prog := llssa.NewProgram(nil) if tt.targetName != "" { prog.Target().Target = tt.targetName @@ -217,7 +236,15 @@ func TestCallerFrameTrackingEligibility(t *testing.T) { } pkg := prog.NewPackage("foo", tt.pkgPath) fn := pkg.NewFunc("f", llssa.NoArgsNoRet, llssa.InGo) - ctx := &context{prog: prog, pkg: pkg, fn: fn, trackCallerFrames: tt.track} + goFn := ssapkg.Func("f") + ctx := &context{ + prog: prog, + pkg: pkg, + fn: fn, + goFn: goFn, + trackCallerFrames: tt.track, + runtimeCallerFuncs: runtimeCallerFuncSet(ssapkg), + } if got := ctx.shouldTrackCallerFrames(); got != tt.want { t.Fatalf("shouldTrackCallerFrames() = %v, want %v", got, tt.want) } @@ -276,15 +303,18 @@ func f() { } ir := pkg.Module().String() for _, want := range []string{ - "PushCallerFrame", - "SetCallerLookupLine", - "PopCallerFrame", + "RecordCallerLocation", `c"example.com/foo.f`, } { if !strings.Contains(ir, want) { t.Fatalf("compiled caller-frame IR missing %q:\n%s", want, ir) } } + for _, old := range []string{"PushCallerFrame", "SetCallerLine", "PopCallerFrame"} { + if strings.Contains(ir, old) { + t.Fatalf("compiled caller-frame IR still contains old %q instrumentation:\n%s", old, ir) + } + } } func TestCompileRuntimeCallerFrameUsesGoNameForLinkname(t *testing.T) { @@ -325,8 +355,8 @@ func f() { if err != nil { t.Fatal(err) } - if ir := pkg.Module().String(); strings.Contains(ir, "PushCallerFrame") { - t.Fatalf("target builds should not emit caller-frame tracking:\n%s", ir) + if ir := pkg.Module().String(); strings.Contains(ir, "RecordCallerLocation") || strings.Contains(ir, "RecordPanicLocation") { + t.Fatalf("target builds should not emit caller location tracking:\n%s", ir) } ssapkg, files = buildCallerFrameSSAPackage(t, "example.com/foo", `package foo @@ -337,12 +367,12 @@ func f() {} if err != nil { t.Fatal(err) } - if ir := pkg.Module().String(); strings.Contains(ir, "PushCallerFrame") || strings.Contains(ir, "SetCallerLine") { - t.Fatalf("packages without runtime stack APIs should not emit caller-frame tracking:\n%s", ir) + if ir := pkg.Module().String(); strings.Contains(ir, "RecordCallerLocation") || strings.Contains(ir, "RecordPanicLocation") { + t.Fatalf("packages without runtime stack APIs should not emit caller location tracking:\n%s", ir) } } -func TestCompileRuntimeCallerLookupTokenOnlyForRuntimeAPIs(t *testing.T) { +func TestCompileRuntimeCallerLocationOnlyForRuntimePaths(t *testing.T) { ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo import "runtime" @@ -359,10 +389,10 @@ func f() { t.Fatal(err) } ir := pkg.Module().String() - if !strings.Contains(ir, "SetCallerLookupLine") { - t.Fatalf("runtime.Caller should enable caller lookup:\n%s", ir) + if !strings.Contains(ir, "RecordCallerLocation") { + t.Fatalf("runtime.Caller should record caller location:\n%s", ir) } - if !strings.Contains(ir, "SetCallerLine") { - t.Fatalf("ordinary calls in an instrumented package should only update the current line:\n%s", ir) + if strings.Contains(ir, "SetCallerLine") || strings.Contains(ir, "PushCallerFrame") { + t.Fatalf("caller location tracking should not emit old TLS instrumentation:\n%s", ir) } } diff --git a/cl/compile.go b/cl/compile.go index 27282c7461..20531efc4d 100644 --- a/cl/compile.go +++ b/cl/compile.go @@ -176,6 +176,7 @@ type context struct { stackDefers map[*ssa.Function]bool anonDefers map[*ssa.Function]bool paramDIVars map[*types.Var]llssa.DIVar + runtimeCallerFuncs map[*ssa.Function]bool patches Patches blkInfos []blocks.Info @@ -747,7 +748,7 @@ func (p *context) compileBlock(b llssa.Builder, block *ssa.BasicBlock, n int, do var ret = fn.Block(block.Index) b.SetBlock(ret) if block.Index == 0 && p.shouldTrackCallerFrames() { - p.pushCallerFrame(b, block.Parent()) + p.pushCallerLocationFrame(b, block.Parent()) } if block.Index == 0 && enableCallTracing && !strings.HasPrefix(fn.Name(), "github.com/goplus/llgo/runtime/internal/runtime.Print") { b.Printf("call " + fn.Name() + "\n\x00") @@ -1137,7 +1138,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue if t := p.type_(v.Type(), llssa.InGo); t.RawType() != nil { if p.isLargeNonPointerValue(t) { x := p.compileValue(b, v.X) - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) p.assertNilDerefBase(b, v.X) b.AssertNilDeref(x) return @@ -1151,7 +1152,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue // Zero-length slice-to-array conversions can leave only // an unused slice deref; preserve its required nil check. x := p.compileValue(b, v.X) - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) p.assertNilDerefBase(b, v.X) b.AssertNilDeref(x) return @@ -1183,7 +1184,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue } x := p.compileValue(b, v.X) if v.Op != token.ARROW { - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) } if shouldAssertDirectNilDeref(v) { b.AssertNilDeref(x) @@ -1220,7 +1221,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue ret = b.Convert(p.type_(t, llssa.InGo), x) case *ssa.FieldAddr: x := p.compileValue(b, v.X) - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) if p.isAddressOfFieldAddr(v) { b.AssertNilDeref(x) } @@ -1242,12 +1243,12 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue } x := p.compileValue(b, vx) idx := p.compileValue(b, v.Index) - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) ret = b.IndexAddr(x, idx) case *ssa.Index: x := p.compileValue(b, v.X) idx := p.compileValue(b, v.Index) - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) ret = b.Index(x, idx, func() (addr llssa.Expr, zero bool) { switch n := v.X.(type) { case *ssa.Const: @@ -1281,7 +1282,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue if v.Max != nil { max = p.compileValue(b, v.Max) } - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) ret = b.Slice(x, low, high, max) ret.Type = p.type_(v.Type(), llssa.InGo) case *ssa.MakeInterface: @@ -1338,7 +1339,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue case *ssa.TypeAssert: x := p.compileValue(b, v.X) t := p.type_(v.AssertedType, llssa.InGo) - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) ret = b.TypeAssert(x, t, v.CommaOk) case *ssa.Extract: x := p.compileValue(b, v.Tuple) @@ -1379,7 +1380,7 @@ func (p *context) compileInstrOrValue(b llssa.Builder, iv instrOrValue, asValue case *ssa.SliceToArrayPointer: t := p.type_(v.Type(), llssa.InGo) x := p.compileValue(b, v.X) - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) ret = b.SliceToArrayPointer(x, t) default: panic(fmt.Sprintf("compileInstrAndValue: unknown instr - %T\n", iv)) @@ -1514,11 +1515,11 @@ func (p *context) compileInstr(b llssa.Builder, instr ssa.Instruction) { } } if p.returnNeedsImplicitRunDefers(v) { - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) b.RunDefers() } if p.shouldTrackCallerFrames() { - p.popCallerFrame(b) + p.popCallerLocationFrame(b) } b.Return(results...) case *ssa.If: @@ -1532,7 +1533,7 @@ func (p *context) compileInstr(b llssa.Builder, instr ssa.Instruction) { m := p.compileValue(b, v.Map) key := p.compileValue(b, v.Key) val := p.compileValue(b, v.Value) - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) b.MapUpdate(m, key, val) case *ssa.Defer: if v.DeferStack != nil { @@ -1543,16 +1544,16 @@ func (p *context) compileInstr(b llssa.Builder, instr ssa.Instruction) { case *ssa.Go: p.call(b, llssa.Go, &v.Call) case *ssa.RunDefers: - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) b.RunDefers() case *ssa.Panic: arg := p.compileValue(b, v.X) - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) b.Panic(arg) case *ssa.Send: ch := p.compileValue(b, v.Chan) x := p.compileValue(b, v.X) - p.setCallerLine(b, v.Pos()) + p.recordPanicLocation(b, v.Pos()) b.Send(ch, x) case *ssa.DebugRef: if enableDbgSyms && v.Parent().Origin() == nil { @@ -1869,7 +1870,8 @@ func newPackageEx(prog llssa.Program, patches Patches, rewrites map[string]strin cgoSymbols: make([]string, 0, 128), rewrites: rewrites, - trackCallerFrames: filesUseRuntimeCaller(files) || packageUsesRuntimeCaller(pkg), + trackCallerFrames: filesUseRuntimeCaller(files) || packageUsesRuntimeCaller(pkg), + runtimeCallerFuncs: runtimeCallerFuncSet(pkg), } if embedMap != nil { ctx.embedMap = *embedMap diff --git a/cl/instr.go b/cl/instr.go index 0c6a2d69a6..357107167b 100644 --- a/cl/instr.go +++ b/cl/instr.go @@ -854,7 +854,10 @@ func (p *context) sourceLine(filename string, line int) (string, bool) { } func (p *context) shouldTrackCallerFrames() bool { - if p == nil || p.pkg == nil || p.fn == nil || !p.trackCallerFrames { + if p == nil || p.pkg == nil || p.fn == nil || p.goFn == nil || !p.trackCallerFrames { + return false + } + if !p.runtimeCallerFuncs[p.goFn] { return false } if target := p.prog.Target(); target != nil && (target.Target != "" || target.GOARCH == "wasm") { @@ -875,19 +878,50 @@ func isStandardLibraryPackage(pkgPath string) bool { } func packageUsesRuntimeCaller(pkg *ssa.Package) bool { + return len(runtimeCallerFuncSet(pkg)) != 0 +} + +func fnUsesRuntimeCaller(fn *ssa.Function) bool { + return runtimeCallerFuncSetFor(fn, make(map[*ssa.Function]bool), make(map[*ssa.Function]bool), false) +} + +func runtimeCallerFuncSet(pkg *ssa.Package) map[*ssa.Function]bool { if pkg == nil { - return false + return nil + } + dynamicCallsMayReachRuntimeCaller := packageHasDirectRuntimeCaller(pkg) + if !dynamicCallsMayReachRuntimeCaller { + return nil } + memo := make(map[*ssa.Function]bool) + visiting := make(map[*ssa.Function]bool) for _, member := range pkg.Members { - fn, ok := member.(*ssa.Function) - if ok && fnUsesRuntimeCaller(fn) { + if fn, ok := member.(*ssa.Function); ok { + runtimeCallerFuncSetFor(fn, memo, visiting, dynamicCallsMayReachRuntimeCaller) + } + } + out := make(map[*ssa.Function]bool) + for fn, ok := range memo { + if ok { + out[fn] = true + } + } + if len(out) == 0 { + return nil + } + return out +} + +func packageHasDirectRuntimeCaller(pkg *ssa.Package) bool { + for _, member := range pkg.Members { + if fn, ok := member.(*ssa.Function); ok && fnHasDirectRuntimeCaller(fn) { return true } } return false } -func fnUsesRuntimeCaller(fn *ssa.Function) bool { +func fnHasDirectRuntimeCaller(fn *ssa.Function) bool { if fn == nil { return false } @@ -903,10 +937,50 @@ func fnUsesRuntimeCaller(fn *ssa.Function) bool { } } for _, anon := range fn.AnonFuncs { - if fnUsesRuntimeCaller(anon) { + if fnHasDirectRuntimeCaller(anon) { + return true + } + } + return false +} + +func runtimeCallerFuncSetFor(fn *ssa.Function, memo, visiting map[*ssa.Function]bool, dynamicCallsMayReachRuntimeCaller bool) bool { + if fn == nil { + return false + } + if ok, done := memo[fn]; done { + return ok + } + if visiting[fn] { + return false + } + visiting[fn] = true + defer delete(visiting, fn) + for _, block := range fn.Blocks { + for _, instr := range block.Instrs { + call, ok := instr.(ssa.CallInstruction) + if !ok { + continue + } + callee := call.Common().StaticCallee() + if callee == nil && dynamicCallsMayReachRuntimeCaller { + memo[fn] = true + return true + } + if isRuntimeCallerFunc(callee) || + (callee != nil && callee.Pkg == fn.Pkg && runtimeCallerFuncSetFor(callee, memo, visiting, dynamicCallsMayReachRuntimeCaller)) { + memo[fn] = true + return true + } + } + } + for _, anon := range fn.AnonFuncs { + if runtimeCallerFuncSetFor(anon, memo, visiting, dynamicCallsMayReachRuntimeCaller) { + memo[fn] = true return true } } + memo[fn] = false return false } @@ -949,21 +1023,6 @@ func isRuntimeCallerName(name string) bool { } } -func (p *context) pushCallerFrame(b llssa.Builder, fn *ssa.Function) { - if fn == nil { - return - } - pos := p.fset.Position(fn.Pos()) - entry := b.Convert(p.prog.Uintptr(), p.fn.Expr) - p.callerFrameMark = b.Call( - p.runtimeFunc("PushCallerFrame", pushCallerFrameSig()), - entry, - b.Str(p.runtimeCallerFrameName()), - b.Str(pos.Filename), - p.prog.IntVal(uint64(pos.Line), p.prog.Int()), - ) -} - func (p *context) runtimeCallerFrameName() string { if p == nil { return "" @@ -977,43 +1036,63 @@ func (p *context) runtimeCallerFrameName() string { return "" } -func (p *context) setCallerLine(b llssa.Builder, pos token.Pos) { - if !p.shouldTrackCallerFrames() { +func (p *context) pushCallerLocationFrame(b llssa.Builder, fn *ssa.Function) { + if fn == nil { return } - line := p.fset.Position(pos).Line - p.setCallerLineNumber(b, line) + pos := p.fset.Position(fn.Pos()) + entry := b.Convert(p.prog.Uintptr(), p.fn.Expr) + p.callerFrameMark = b.Call( + p.runtimeFunc("PushCallerLocationFrame", pushCallerLocationFrameSig()), + entry, + b.Str(p.runtimeCallerFrameName()), + b.Str(pos.Filename), + p.prog.IntVal(uint64(pos.Line), p.prog.Int()), + ) +} + +func (p *context) recordCallerLocation(b llssa.Builder, pos token.Pos) { + p.recordRuntimeLocation(b, pos, "RecordCallerLocation") } -func (p *context) setCallerLineForCall(b llssa.Builder, call *ssa.CallCommon) { +func (p *context) recordPanicLocation(b llssa.Builder, pos token.Pos) { + p.recordRuntimeLocation(b, pos, "RecordPanicLocation") +} + +func (p *context) recordRuntimeLocation(b llssa.Builder, pos token.Pos, fn string) { if !p.shouldTrackCallerFrames() { return } - line := p.fset.Position(call.Pos()).Line - if line <= 0 { + position := p.fset.Position(pos) + if position.Line <= 0 || position.Filename == "" { return } - fn := "SetCallerLine" - sig := setCallerLineSig() - if isRuntimeCallerLookupFunc(call.StaticCallee()) { - fn = "SetCallerLookupLine" - sig = setCallerLookupLineSig() - } - b.Call(p.runtimeFunc(fn, sig), p.prog.IntVal(uint64(line), p.prog.Int())) + b.Call( + p.runtimeFunc(fn, recordRuntimeLocationSig()), + b.Convert(p.prog.Uintptr(), p.fn.Expr), + b.Str(p.runtimeCallerFrameName()), + b.Str(position.Filename), + p.prog.IntVal(uint64(position.Line), p.prog.Int()), + ) } -func (p *context) setCallerLineNumber(b llssa.Builder, line int) { - if line <= 0 { +func (p *context) recordCallerLocationForCall(b llssa.Builder, call *ssa.CallCommon) { + if !p.shouldTrackCallerFrames() { + return + } + callee := call.StaticCallee() + if isRuntimeCallerLookupFunc(callee) { + p.recordCallerLocation(b, call.Pos()) return } - b.Call(p.runtimeFunc("SetCallerLine", setCallerLineSig()), p.prog.IntVal(uint64(line), p.prog.Int())) + p.recordPanicLocation(b, call.Pos()) } -func (p *context) popCallerFrame(b llssa.Builder) { +func (p *context) popCallerLocationFrame(b llssa.Builder) { if p.callerFrameMark.IsNil() { return } - b.Call(p.runtimeFunc("PopCallerFrame", popCallerFrameSig()), p.callerFrameMark) + b.Call(p.runtimeFunc("PopCallerLocationFrame", popCallerLocationFrameSig()), p.callerFrameMark) } func (p *context) runtimeFunc(name string, sig *types.Signature) llssa.Expr { @@ -1025,7 +1104,7 @@ func (p *context) runtimeFunc(name string, sig *types.Signature) llssa.Expr { return p.pkg.NewFuncEx(fullName, sig, llssa.InGo, false, false).Expr } -func pushCallerFrameSig() *types.Signature { +func pushCallerLocationFrameSig() *types.Signature { return types.NewSignatureType(nil, nil, nil, types.NewTuple( types.NewVar(token.NoPos, nil, "entry", types.Typ[types.Uintptr]), @@ -1038,19 +1117,20 @@ func pushCallerFrameSig() *types.Signature { ) } -func setCallerLineSig() *types.Signature { +func recordRuntimeLocationSig() *types.Signature { return types.NewSignatureType(nil, nil, nil, - types.NewTuple(types.NewVar(token.NoPos, nil, "line", types.Typ[types.Int])), + types.NewTuple( + types.NewVar(token.NoPos, nil, "entry", types.Typ[types.Uintptr]), + types.NewVar(token.NoPos, nil, "name", types.Typ[types.String]), + types.NewVar(token.NoPos, nil, "file", types.Typ[types.String]), + types.NewVar(token.NoPos, nil, "line", types.Typ[types.Int]), + ), nil, false, ) } -func setCallerLookupLineSig() *types.Signature { - return setCallerLineSig() -} - -func popCallerFrameSig() *types.Signature { +func popCallerLocationFrameSig() *types.Signature { return types.NewSignatureType(nil, nil, nil, types.NewTuple(types.NewVar(token.NoPos, nil, "mark", types.Typ[types.Int])), nil, @@ -1275,7 +1355,7 @@ func collectMethodNilDerefChecks(fn *ssa.Function) map[*ssa.UnOp]none { } func (p *context) callEx(b llssa.Builder, act llssa.DoAction, call *ssa.CallCommon, ds *explicitDeferStack) (ret llssa.Expr) { - p.setCallerLineForCall(b, call) + p.recordCallerLocationForCall(b, call) cv := call.Value if mthd := call.Method; mthd != nil { reflectCheck := p.reflectTypeMethodCheck(call, mthd) diff --git a/internal/build/funcinfo/funcinfo.go b/internal/build/funcinfo/funcinfo.go index c092b606e9..c6043484c6 100644 --- a/internal/build/funcinfo/funcinfo.go +++ b/internal/build/funcinfo/funcinfo.go @@ -32,56 +32,62 @@ type Record struct { } type EncodedRecord struct { - Symbol uint32 - Name uint32 - File uint32 - Line uint32 - Column uint32 + SymbolPkg uint16 + SymbolName uint16 + NamePkg uint16 + NameName uint16 + FileRoot uint16 + FileName uint16 + Line uint32 } type Table struct { - Records []EncodedRecord - Strings []byte - Hash []uint32 + Records []EncodedRecord + StringOffsets []uint32 + Strings []byte + Hash []uint16 } func Encode(records []Record) (Table, error) { if len(records) == 0 { return Table{}, nil } - pool := stringPool{ - offsets: map[string]uint32{"": 0}, - data: []byte{0}, - text: "\x00", - } - for _, s := range collectStrings(records) { - if _, err := pool.offset(s); err != nil { - return Table{}, err - } + ids, offsets, strings, err := buildStringTable(collectStrings(records)) + if err != nil { + return Table{}, err } out := Table{ - Records: make([]EncodedRecord, 0, len(records)), + Records: make([]EncodedRecord, 0, len(records)), + StringOffsets: offsets, + Strings: strings, } for _, rec := range records { + symPkg, symName := splitQualifiedName(rec.Symbol) + namePkg, nameName := splitQualifiedName(rec.Name) + fileRoot, fileName := splitFileName(rec.File) out.Records = append(out.Records, EncodedRecord{ - Symbol: pool.offsets[rec.Symbol], - Name: pool.offsets[rec.Name], - File: pool.offsets[rec.File], - Line: rec.Line, - Column: rec.Column, + SymbolPkg: ids[symPkg], + SymbolName: ids[symName], + NamePkg: ids[namePkg], + NameName: ids[nameName], + FileRoot: ids[fileRoot], + FileName: ids[fileName], + Line: rec.Line, }) } - out.Strings = pool.data - out.Hash = buildHash(records) + out.Hash, err = buildHash(records) + if err != nil { + return Table{}, err + } return out, nil } func collectStrings(records []Record) []string { seen := make(map[string]bool) for _, rec := range records { - seen[rec.Symbol] = true - seen[rec.Name] = true - seen[rec.File] = true + for _, s := range splitRecordStrings(rec) { + seen[s] = true + } } delete(seen, "") out := make([]string, 0, len(seen)) @@ -97,6 +103,69 @@ func collectStrings(records []Record) []string { return out } +func splitRecordStrings(rec Record) []string { + symPkg, symName := splitQualifiedName(rec.Symbol) + namePkg, nameName := splitQualifiedName(rec.Name) + fileRoot, fileName := splitFileName(rec.File) + return []string{symPkg, symName, namePkg, nameName, fileRoot, fileName} +} + +func buildStringTable(strings []string) (map[string]uint16, []uint32, []byte, error) { + ids := map[string]uint16{"": 0} + values := []string{""} + for _, s := range strings { + if _, ok := ids[s]; ok { + continue + } + if len(values) > math.MaxUint16 { + return nil, nil, nil, fmt.Errorf("funcinfo string id table exceeds 65535 entries") + } + ids[s] = uint16(len(values)) + values = append(values, s) + } + pool := stringPool{ + offsets: map[string]uint32{"": 0}, + data: []byte{0}, + text: "\x00", + } + offsets := make([]uint32, len(values)) + for id, s := range values { + off, err := pool.offset(s) + if err != nil { + return nil, nil, nil, err + } + offsets[id] = off + } + return ids, offsets, pool.data, nil +} + +func splitQualifiedName(name string) (pkg, local string) { + if name == "" { + return "", "" + } + start := strings.LastIndexByte(name, '/') + if start < 0 { + start = 0 + } else { + start++ + } + if dot := strings.IndexByte(name[start:], '.'); dot >= 0 { + idx := start + dot + return name[:idx], name[idx+1:] + } + return "", name +} + +func splitFileName(file string) (root, name string) { + if file == "" { + return "", "" + } + if slash := strings.LastIndexByte(file, '/'); slash >= 0 { + return file[:slash+1], file[slash+1:] + } + return "", file +} + type stringPool struct { offsets map[string]uint32 data []byte @@ -123,23 +192,26 @@ func (p *stringPool) offset(s string) (uint32, error) { return off, nil } -func buildHash(records []Record) []uint32 { +func buildHash(records []Record) ([]uint16, error) { if len(records) == 0 { - return nil + return nil, nil + } + if len(records) > math.MaxUint16 { + return nil, nil } buckets := 1 for buckets*3 < len(records)*4 { buckets <<= 1 } - hash := make([]uint32, buckets) + hash := make([]uint16, buckets) for i, rec := range records { slot := int(HashString(rec.Symbol) & uint32(buckets-1)) for hash[slot] != 0 { slot = (slot + 1) & (buckets - 1) } - hash[slot] = uint32(i + 1) + hash[slot] = uint16(i + 1) } - return hash + return hash, nil } func HashString(s string) uint32 { @@ -154,3 +226,129 @@ func HashString(s string) uint32 { } return h } + +func (t Table) String(id uint16) string { + if int(id) >= len(t.StringOffsets) { + return "" + } + return cstring(t.Strings, t.StringOffsets[id]) +} + +func (t Table) Symbol(rec EncodedRecord) string { + return joinQualified(t.String(rec.SymbolPkg), t.String(rec.SymbolName)) +} + +func (t Table) Name(rec EncodedRecord) string { + return joinQualified(t.String(rec.NamePkg), t.String(rec.NameName)) +} + +func (t Table) File(rec EncodedRecord) string { + return t.String(rec.FileRoot) + t.String(rec.FileName) +} + +func (t Table) LookupSymbol(symbol string) (int, bool) { + if len(t.Hash) == 0 { + return 0, false + } + mask := uint32(len(t.Hash) - 1) + slot := HashString(symbol) & mask + for probes := 0; probes < len(t.Hash); probes++ { + idx := t.Hash[slot] + if idx == 0 { + return 0, false + } + rec := t.Records[idx-1] + if t.Symbol(rec) == symbol { + return int(idx - 1), true + } + slot = (slot + 1) & mask + } + return 0, false +} + +func (t Table) SizeBytes() int { + return len(t.Records)*16 + len(t.StringOffsets)*4 + len(t.Strings) + len(t.Hash)*2 +} + +func joinQualified(pkg, local string) string { + if pkg == "" { + return local + } + if local == "" { + return pkg + } + return pkg + "." + local +} + +func cstring(data []byte, off uint32) string { + end := int(off) + for end < len(data) && data[end] != 0 { + end++ + } + return string(data[off:end]) +} + +type PCIndex struct { + PageShift uint + Base uint64 + Pages []uint32 +} + +const DefaultPCPageShift = 12 + +func BuildPCIndex(entries []uint64) PCIndex { + return BuildPCIndexWithShift(entries, DefaultPCPageShift) +} + +func BuildPCIndexWithShift(entries []uint64, shift uint) PCIndex { + if len(entries) == 0 { + return PCIndex{PageShift: shift} + } + base := entries[0] >> shift + last := entries[len(entries)-1] >> shift + pages := make([]uint32, last-base+2) + next := 0 + for page := range pages { + limit := (base + uint64(page)) << shift + for next < len(entries) && entries[next] < limit { + next++ + } + pages[page] = uint32(next) + } + return PCIndex{ + PageShift: shift, + Base: base, + Pages: pages, + } +} + +func LookupPC(entries []uint64, index PCIndex, pc uint64) int { + if len(entries) == 0 { + return -1 + } + lo, hi := 0, len(entries) + page := pc >> index.PageShift + if len(index.Pages) != 0 && page >= index.Base { + off := page - index.Base + if off < uint64(len(index.Pages)) { + lo = int(index.Pages[off]) + if off+1 < uint64(len(index.Pages)) { + hi = int(index.Pages[off+1]) + } + if lo > 0 { + lo-- + } + if hi < len(entries) { + hi++ + } + } + } + i := sort.Search(hi-lo, func(i int) bool { + return entries[lo+i] > pc + }) + idx := lo + i - 1 + if idx < 0 { + return -1 + } + return idx +} diff --git a/internal/build/funcinfo/funcinfo_test.go b/internal/build/funcinfo/funcinfo_test.go index 7bc92ec8b1..238543d3e9 100644 --- a/internal/build/funcinfo/funcinfo_test.go +++ b/internal/build/funcinfo/funcinfo_test.go @@ -29,27 +29,27 @@ func TestEncodePoolsStringsAndBuildsHash(t *testing.T) { if len(table.Records) != 2 { t.Fatalf("encoded records = %d, want 2", len(table.Records)) } - if table.Records[0].File == table.Records[1].File { - t.Fatalf("suffix sharing should not collapse distinct file strings to the same offset") + if table.Records[0].FileRoot == table.Records[1].FileRoot { + t.Fatalf("distinct file roots should use distinct ids") } - if got := cstring(table.Strings, table.Records[1].File); got != "shared.go" { + if got := table.File(table.Records[1]); got != "shared.go" { t.Fatalf("suffix file string = %q, want shared.go", got) } if len(table.Hash) == 0 || len(table.Hash)&(len(table.Hash)-1) != 0 { t.Fatalf("hash bucket count = %d, want power-of-two non-zero", len(table.Hash)) } - if idx, ok := lookup(table, "example.com/p.a"); !ok || idx != 0 { + if idx, ok := table.LookupSymbol("example.com/p.a"); !ok || idx != 0 { t.Fatalf("lookup a = %d, %v; want 0, true", idx, ok) } - if idx, ok := lookup(table, "example.com/p.b"); !ok || idx != 1 { + if idx, ok := table.LookupSymbol("example.com/p.b"); !ok || idx != 1 { t.Fatalf("lookup b = %d, %v; want 1, true", idx, ok) } - if _, ok := lookup(table, "missing"); ok { + if _, ok := table.LookupSymbol("missing"); ok { t.Fatalf("lookup missing succeeded") } } -func TestEncodeUsesUint32Records(t *testing.T) { +func TestEncodeRoundTripsSingleRecord(t *testing.T) { table, err := Encode([]Record{{Symbol: "s", Name: "n", File: "f", Line: 1, Column: 2}}) if err != nil { t.Fatal(err) @@ -58,17 +58,17 @@ func TestEncodeUsesUint32Records(t *testing.T) { t.Fatalf("records = %d, want %d", got, want) } rec := table.Records[0] - if got, want := cstring(table.Strings, rec.Symbol), "s"; got != want { + if got, want := table.Symbol(rec), "s"; got != want { t.Fatalf("symbol = %q, want %q", got, want) } - if got, want := cstring(table.Strings, rec.Name), "n"; got != want { + if got, want := table.Name(rec), "n"; got != want { t.Fatalf("name = %q, want %q", got, want) } - if got, want := cstring(table.Strings, rec.File), "f"; got != want { + if got, want := table.File(rec), "f"; got != want { t.Fatalf("file = %q, want %q", got, want) } - if rec.Line != 1 || rec.Column != 2 { - t.Fatalf("source position = %d:%d, want 1:2", rec.Line, rec.Column) + if rec.Line != 1 { + t.Fatalf("source line = %d, want 1", rec.Line) } } @@ -81,14 +81,101 @@ func TestEncodeHashHandlesCollisions(t *testing.T) { if err != nil { t.Fatal(err) } - if idx, ok := lookup(table, a); !ok || idx != 0 { + if idx, ok := table.LookupSymbol(a); !ok || idx != 0 { t.Fatalf("lookup collision a = %d, %v; want 0, true", idx, ok) } - if idx, ok := lookup(table, b); !ok || idx != 1 { + if idx, ok := table.LookupSymbol(b); !ok || idx != 1 { t.Fatalf("lookup collision b = %d, %v; want 1, true", idx, ok) } } +func TestEncodeOmitsHashWhenRecordIndexesDoNotFitUint16(t *testing.T) { + records := make([]Record, 1<<16) + for i := range records { + records[i] = Record{Symbol: "example.com/p.f", Name: "example.com/p.F"} + } + table, err := Encode(records) + if err != nil { + t.Fatal(err) + } + if table.Hash != nil { + t.Fatalf("hash buckets = %d, want nil fallback for oversized table", len(table.Hash)) + } + if len(table.Records) != len(records) { + t.Fatalf("records = %d, want %d", len(table.Records), len(records)) + } +} + +func TestEncodeSplitsPackageAndFilePrefixes(t *testing.T) { + records := []Record{ + {Symbol: "example.com/p.alpha", Name: "example.com/p.Alpha", File: "/home/me/mod/p/alpha.go", Line: 10}, + {Symbol: "example.com/p.beta", Name: "example.com/p.Beta", File: "/home/me/mod/p/beta.go", Line: 20}, + {Symbol: "example.com/q.gamma", Name: "example.com/q.Gamma", File: "/home/me/mod/q/gamma.go", Line: 30}, + } + table, err := Encode(records) + if err != nil { + t.Fatal(err) + } + for i, rec := range table.Records { + if got := table.Symbol(rec); got != records[i].Symbol { + t.Fatalf("record %d symbol = %q, want %q", i, got, records[i].Symbol) + } + if got := table.Name(rec); got != records[i].Name { + t.Fatalf("record %d name = %q, want %q", i, got, records[i].Name) + } + if got := table.File(rec); got != records[i].File { + t.Fatalf("record %d file = %q, want %q", i, got, records[i].File) + } + } + if table.Records[0].SymbolPkg != table.Records[1].SymbolPkg { + t.Fatalf("same package prefix got different ids: %d vs %d", table.Records[0].SymbolPkg, table.Records[1].SymbolPkg) + } + if table.Records[0].FileRoot != table.Records[1].FileRoot { + t.Fatalf("same file root got different ids: %d vs %d", table.Records[0].FileRoot, table.Records[1].FileRoot) + } + if got := table.SizeBytes(); got >= legacySizeBytes(records) { + t.Fatalf("compressed table size = %d, want below legacy %d", got, legacySizeBytes(records)) + } +} + +func TestLookupPCUsesPageIndex(t *testing.T) { + entries := []uint64{0x1000, 0x1010, 0x2800, 0x4000, 0x4010} + index := BuildPCIndex(entries) + tests := []struct { + pc uint64 + want int + }{ + {0xfff, -1}, + {0x1000, 0}, + {0x100f, 0}, + {0x1010, 1}, + {0x27ff, 1}, + {0x2800, 2}, + {0x4018, 4}, + } + for _, tt := range tests { + if got := LookupPC(entries, index, tt.pc); got != tt.want { + t.Fatalf("LookupPC(%#x) = %d, want %d", tt.pc, got, tt.want) + } + } +} + +func BenchmarkLookupPCRandom(b *testing.B) { + entries := make([]uint64, 8192) + for i := range entries { + entries[i] = 0x100000 + uint64(i)*37 + } + index := BuildPCIndex(entries) + var sum int + for i := 0; i < b.N; i++ { + pc := entries[(i*1103515245+12345)&(len(entries)-1)] + uint64(i&31) + sum += LookupPC(entries, index, pc) + } + if sum == 0 { + b.Fatal(sum) + } +} + func collisionPair(t *testing.T) (string, string) { t.Helper() const mask = uint32(3) @@ -105,30 +192,21 @@ func collisionPair(t *testing.T) (string, string) { return "", "" } -func cstring(data []byte, off uint32) string { - end := int(off) - for end < len(data) && data[end] != 0 { - end++ - } - return string(data[off:end]) -} - -func lookup(table Table, symbol string) (int, bool) { - if len(table.Hash) == 0 { - return 0, false - } - mask := uint32(len(table.Hash) - 1) - slot := HashString(symbol) & mask - for probes := 0; probes < len(table.Hash); probes++ { - idx := table.Hash[slot] - if idx == 0 { - return 0, false +func legacySizeBytes(records []Record) int { + seen := make(map[string]bool) + stringsBytes := 1 + for _, rec := range records { + for _, s := range []string{rec.Symbol, rec.Name, rec.File} { + if s == "" || seen[s] { + continue + } + seen[s] = true + stringsBytes += len(s) + 1 } - rec := table.Records[idx-1] - if cstring(table.Strings, rec.Symbol) == symbol { - return int(idx - 1), true - } - slot = (slot + 1) & mask } - return 0, false + buckets := 1 + for buckets*3 < len(records)*4 { + buckets <<= 1 + } + return len(records)*20 + stringsBytes + buckets*4 } diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index 30c63c5fe4..37402229dd 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -26,14 +26,16 @@ import ( ) const ( - funcInfoTableSymbol = "__llgo_funcinfo_table" - funcInfoCountSymbol = "__llgo_funcinfo_count" - funcInfoStringsSymbol = "__llgo_funcinfo_strings" - funcInfoHashSymbol = "__llgo_funcinfo_hash" - funcInfoHashMaskSymbol = "__llgo_funcinfo_hash_mask" - funcInfoDataSymbol = "__llgo_funcinfo_table$data" - funcInfoStringsDataSymbol = "__llgo_funcinfo_strings$data" - funcInfoHashDataSymbol = "__llgo_funcinfo_hash$data" + funcInfoTableSymbol = "__llgo_funcinfo_table" + funcInfoCountSymbol = "__llgo_funcinfo_count" + funcInfoStringsSymbol = "__llgo_funcinfo_strings" + funcInfoStringOffsetsSymbol = "__llgo_funcinfo_string_offsets" + funcInfoHashSymbol = "__llgo_funcinfo_hash" + funcInfoHashMaskSymbol = "__llgo_funcinfo_hash_mask" + funcInfoDataSymbol = "__llgo_funcinfo_table$data" + funcInfoStringsDataSymbol = "__llgo_funcinfo_strings$data" + funcInfoStringOffsetsDataSymbol = "__llgo_funcinfo_string_offsets$data" + funcInfoHashDataSymbol = "__llgo_funcinfo_hash$data" ) type funcInfoRecord struct { @@ -125,24 +127,29 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord mod := pkg.Module() llvmCtx := mod.Context() i8Type := llvmCtx.Int8Type() + i16Type := llvmCtx.Int16Type() i32Type := llvmCtx.Int32Type() countType := llvmCtx.IntType(ctx.prog.PointerSize() * 8) recordType := llvmCtx.StructType([]llvm.Type{ - i32Type, - i32Type, - i32Type, - i32Type, + i16Type, + i16Type, + i16Type, + i16Type, + i16Type, + i16Type, i32Type, }, false) tablePtr := llvm.AddGlobal(mod, llvm.PointerType(recordType, 0), funcInfoTableSymbol) stringsPtr := llvm.AddGlobal(mod, llvm.PointerType(i8Type, 0), funcInfoStringsSymbol) - hashPtr := llvm.AddGlobal(mod, llvm.PointerType(i32Type, 0), funcInfoHashSymbol) + stringOffsetsPtr := llvm.AddGlobal(mod, llvm.PointerType(i32Type, 0), funcInfoStringOffsetsSymbol) + hashPtr := llvm.AddGlobal(mod, llvm.PointerType(i16Type, 0), funcInfoHashSymbol) count := llvm.AddGlobal(mod, countType, funcInfoCountSymbol) hashMask := llvm.AddGlobal(mod, countType, funcInfoHashMaskSymbol) if len(records) == 0 { tablePtr.SetInitializer(llvm.ConstPointerNull(tablePtr.GlobalValueType())) stringsPtr.SetInitializer(llvm.ConstPointerNull(stringsPtr.GlobalValueType())) + stringOffsetsPtr.SetInitializer(llvm.ConstPointerNull(stringOffsetsPtr.GlobalValueType())) hashPtr.SetInitializer(llvm.ConstPointerNull(hashPtr.GlobalValueType())) count.SetInitializer(llvm.ConstInt(countType, 0, false)) hashMask.SetInitializer(llvm.ConstInt(countType, 0, false)) @@ -157,11 +164,13 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord values := make([]llvm.Value, 0, len(encoded.Records)) for _, rec := range encoded.Records { values = append(values, llvm.ConstNamedStruct(recordType, []llvm.Value{ - llvm.ConstInt(i32Type, uint64(rec.Symbol), false), - llvm.ConstInt(i32Type, uint64(rec.Name), false), - llvm.ConstInt(i32Type, uint64(rec.File), false), + llvm.ConstInt(i16Type, uint64(rec.SymbolPkg), false), + llvm.ConstInt(i16Type, uint64(rec.SymbolName), false), + llvm.ConstInt(i16Type, uint64(rec.NamePkg), false), + llvm.ConstInt(i16Type, uint64(rec.NameName), false), + llvm.ConstInt(i16Type, uint64(rec.FileRoot), false), + llvm.ConstInt(i16Type, uint64(rec.FileName), false), llvm.ConstInt(i32Type, uint64(rec.Line), false), - llvm.ConstInt(i32Type, uint64(rec.Column), false), })) } arrayType := llvm.ArrayType(recordType, len(values)) @@ -180,17 +189,17 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord stringData.SetUnnamedAddr(true) stringData.SetAlignment(1) - hashValues := make([]llvm.Value, 0, len(encoded.Hash)) - for _, idx := range encoded.Hash { - hashValues = append(hashValues, llvm.ConstInt(i32Type, uint64(idx), false)) + stringOffsetValues := make([]llvm.Value, 0, len(encoded.StringOffsets)) + for _, off := range encoded.StringOffsets { + stringOffsetValues = append(stringOffsetValues, llvm.ConstInt(i32Type, uint64(off), false)) } - hashArrayType := llvm.ArrayType(i32Type, len(hashValues)) - hashData := llvm.AddGlobal(mod, hashArrayType, funcInfoHashDataSymbol) - hashData.SetInitializer(llvm.ConstArray(i32Type, hashValues)) - hashData.SetLinkage(llvm.PrivateLinkage) - hashData.SetGlobalConstant(true) - hashData.SetUnnamedAddr(true) - hashData.SetAlignment(4) + stringOffsetsArrayType := llvm.ArrayType(i32Type, len(stringOffsetValues)) + stringOffsetsData := llvm.AddGlobal(mod, stringOffsetsArrayType, funcInfoStringOffsetsDataSymbol) + stringOffsetsData.SetInitializer(llvm.ConstArray(i32Type, stringOffsetValues)) + stringOffsetsData.SetLinkage(llvm.PrivateLinkage) + stringOffsetsData.SetGlobalConstant(true) + stringOffsetsData.SetUnnamedAddr(true) + stringOffsetsData.SetAlignment(4) tablePtr.SetInitializer(llvm.ConstInBoundsGEP(arrayType, data, []llvm.Value{ llvm.ConstInt(countType, 0, false), @@ -200,12 +209,32 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord llvm.ConstInt(countType, 0, false), llvm.ConstInt(countType, 0, false), })) - hashPtr.SetInitializer(llvm.ConstInBoundsGEP(hashArrayType, hashData, []llvm.Value{ + stringOffsetsPtr.SetInitializer(llvm.ConstInBoundsGEP(stringOffsetsArrayType, stringOffsetsData, []llvm.Value{ llvm.ConstInt(countType, 0, false), llvm.ConstInt(countType, 0, false), })) + if len(encoded.Hash) == 0 { + hashPtr.SetInitializer(llvm.ConstPointerNull(hashPtr.GlobalValueType())) + hashMask.SetInitializer(llvm.ConstInt(countType, 0, false)) + } else { + hashValues := make([]llvm.Value, 0, len(encoded.Hash)) + for _, idx := range encoded.Hash { + hashValues = append(hashValues, llvm.ConstInt(i16Type, uint64(idx), false)) + } + hashArrayType := llvm.ArrayType(i16Type, len(hashValues)) + hashData := llvm.AddGlobal(mod, hashArrayType, funcInfoHashDataSymbol) + hashData.SetInitializer(llvm.ConstArray(i16Type, hashValues)) + hashData.SetLinkage(llvm.PrivateLinkage) + hashData.SetGlobalConstant(true) + hashData.SetUnnamedAddr(true) + hashData.SetAlignment(2) + hashPtr.SetInitializer(llvm.ConstInBoundsGEP(hashArrayType, hashData, []llvm.Value{ + llvm.ConstInt(countType, 0, false), + llvm.ConstInt(countType, 0, false), + })) + hashMask.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Hash)-1), false)) + } count.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Records)), false)) - hashMask.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Hash)-1), false)) } func toFuncInfoRecords(records []funcInfoRecord) []buildfuncinfo.Record { diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index f7367ebf56..649a20a8bc 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -56,17 +56,18 @@ func TestFuncInfoTableMaterializesMetadataWithoutFunctionPointers(t *testing.T) for _, want := range []string{ "@__llgo_funcinfo_table = global ptr", "@__llgo_funcinfo_strings = global ptr", + "@__llgo_funcinfo_string_offsets = global ptr", "@__llgo_funcinfo_hash = global ptr", "@__llgo_funcinfo_count = global i64 1", "@__llgo_funcinfo_hash_mask = global i64 1", - `@"__llgo_funcinfo_table$data" = private unnamed_addr constant [1 x { i32, i32, i32, i32, i32 }]`, - `@"__llgo_funcinfo_strings$data" = private unnamed_addr constant [47 x i8]`, - `@"__llgo_funcinfo_hash$data" = private unnamed_addr constant [2 x i32]`, - `example.com/p.live\00`, - `example.com/p.Live\00`, + `@"__llgo_funcinfo_table$data" = private unnamed_addr constant [1 x { i16, i16, i16, i16, i16, i16, i32 }]`, + `@"__llgo_funcinfo_string_offsets$data" = private unnamed_addr constant`, + `@"__llgo_funcinfo_hash$data" = private unnamed_addr constant [2 x i16]`, + `example.com/p\00`, + `live\00`, + `Live\00`, `live.go\00`, "i32 17", - "i32 3", } { if !strings.Contains(ir, want) { t.Fatalf("funcinfo table IR missing %q:\n%s", want, ir) @@ -138,6 +139,7 @@ func TestFuncInfoTableEmptyDefinitions(t *testing.T) { for _, want := range []string{ "@__llgo_funcinfo_table = global ptr null", "@__llgo_funcinfo_strings = global ptr null", + "@__llgo_funcinfo_string_offsets = global ptr null", "@__llgo_funcinfo_hash = global ptr null", "@__llgo_funcinfo_count = global i64 0", "@__llgo_funcinfo_hash_mask = global i64 0", diff --git a/runtime/internal/lib/runtime/extern.go b/runtime/internal/lib/runtime/extern.go index d6835b794f..66f95de36f 100644 --- a/runtime/internal/lib/runtime/extern.go +++ b/runtime/internal/lib/runtime/extern.go @@ -54,6 +54,7 @@ func callers(skip int, pc []uintptr) int { } pc[n] = fr.PC recordFrameSymbol(fr.PC, fr.Offset, fr.Name) + rtdebug.BindCallerLocation(fr.PC, fr.Name) n++ return true }) diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index bd131c9a25..8fac1ada4b 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -2,7 +2,10 @@ package runtime -import llrt "github.com/goplus/llgo/runtime/internal/runtime" +import ( + "github.com/goplus/llgo/runtime/internal/clite/tls" + llrt "github.com/goplus/llgo/runtime/internal/runtime" +) type StackRecord struct { Stack []uintptr @@ -84,10 +87,32 @@ func NumGoroutine() int { func SetCPUProfileRate(hz int) {} +const funcForPCCacheSize = 256 + +type funcForPCCacheEntry struct { + pc uintptr + fn *Func +} + +type funcForPCCache struct { + entries [funcForPCCacheSize]funcForPCCacheEntry +} + +var funcForPCCacheTLS = tls.Alloc[*funcForPCCache](nil) + func FuncForPC(pc uintptr) *Func { + if fn := cachedFuncForPC(pc); fn != nil { + return fn + } sym := frameSymbol(pc) + fn := newFuncForPC(pc, sym) + cacheFuncForPC(pc, fn) + return fn +} + +func newFuncForPC(pc uintptr, sym pcSymbol) *Func { if !sym.ok && sym.function == "" { - return &Func{entry: pc, name: unknownFunctionName(pc)} + return &Func{entry: pc, name: unknownFunctionName(pc), pc: pc} } name := sym.function if name == "" { @@ -97,5 +122,39 @@ func FuncForPC(pc uintptr) *Func { if entry == 0 { entry = pc } - return &Func{entry: entry, name: name} + return &Func{ + entry: entry, + name: name, + pc: pc, + file: sym.file, + line: sym.line, + } +} + +func cachedFuncForPC(pc uintptr) *Func { + cache := funcForPCCacheTLS.Get() + if cache == nil { + return nil + } + entry := &cache.entries[funcForPCCacheIndex(pc)] + if entry.pc == pc && entry.fn != nil { + return entry.fn + } + return nil +} + +func cacheFuncForPC(pc uintptr, fn *Func) { + cache := funcForPCCacheTLS.Get() + if cache == nil { + cache = new(funcForPCCache) + funcForPCCacheTLS.Set(cache) + } + cache.entries[funcForPCCacheIndex(pc)] = funcForPCCacheEntry{ + pc: pc, + fn: fn, + } +} + +func funcForPCCacheIndex(pc uintptr) uintptr { + return (pc >> 4) & (funcForPCCacheSize - 1) } diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index bd0a616018..919cd74ed3 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -135,11 +135,13 @@ func recordFrameSymbol(pc, offset uintptr, name string) { } type runtimeFuncInfoRecord struct { - symbol uint32 - name uint32 - file uint32 - line uint32 - column uint32 + symbolPkg uint16 + symbolName uint16 + namePkg uint16 + nameName uint16 + fileRoot uint16 + fileName uint16 + line uint32 } //go:linkname runtimeFuncInfoTable __llgo_funcinfo_table @@ -148,8 +150,11 @@ var runtimeFuncInfoTable *runtimeFuncInfoRecord //go:linkname runtimeFuncInfoStrings __llgo_funcinfo_strings var runtimeFuncInfoStrings *c.Char +//go:linkname runtimeFuncInfoStringOffsets __llgo_funcinfo_string_offsets +var runtimeFuncInfoStringOffsets *uint32 + //go:linkname runtimeFuncInfoHash __llgo_funcinfo_hash -var runtimeFuncInfoHash *uint32 +var runtimeFuncInfoHash *uint16 //go:linkname runtimeFuncInfoCount __llgo_funcinfo_count var runtimeFuncInfoCount uintptr @@ -180,10 +185,6 @@ func publicFunctionName(name string) string { return name } -func cStringEqual(cstr *c.Char, s string) bool { - return cStringCompare(cstr, s) == 0 -} - func cStringCompare(cstr *c.Char, s string) int { if cstr == nil { if s == "" { @@ -212,10 +213,37 @@ func cStringCompare(cstr *c.Char, s string) int { } } -func funcInfoCString(off uint32) *c.Char { - if runtimeFuncInfoStrings == nil { +func cStringLen(cstr *c.Char) int { + if cstr == nil { + return 0 + } + ptr := unsafe.Pointer(cstr) + for i := 0; ; i++ { + if *(*byte)(unsafe.Add(ptr, i)) == 0 { + return i + } + } +} + +func cStringAppend(dst []byte, cstr *c.Char) []byte { + if cstr == nil { + return dst + } + ptr := unsafe.Pointer(cstr) + for i := 0; ; i++ { + c := *(*byte)(unsafe.Add(ptr, i)) + if c == 0 { + return dst + } + dst = append(dst, c) + } +} + +func funcInfoCString(id uint16) *c.Char { + if runtimeFuncInfoStrings == nil || runtimeFuncInfoStringOffsets == nil { return nil } + off := *(*uint32)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoStringOffsets), uintptr(id)*unsafe.Sizeof(*runtimeFuncInfoStringOffsets))) return (*c.Char)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoStrings), uintptr(off))) } @@ -237,6 +265,58 @@ func funcInfoHashString(s string) uintptr { return uintptr(h) } +func funcInfoSymbolEqual(rec *runtimeFuncInfoRecord, symbol string) bool { + pkg := funcInfoCString(rec.symbolPkg) + name := funcInfoCString(rec.symbolName) + pkgLen := cStringLen(pkg) + nameLen := cStringLen(name) + if pkgLen == 0 { + return cStringCompare(name, symbol) == 0 + } + if len(symbol) != pkgLen+1+nameLen { + return false + } + if cStringCompare(pkg, symbol[:pkgLen]) != 0 || symbol[pkgLen] != '.' { + return false + } + return cStringCompare(name, symbol[pkgLen+1:]) == 0 +} + +func funcInfoJoinName(pkgID, nameID uint16) string { + pkg := funcInfoCString(pkgID) + name := funcInfoCString(nameID) + pkgLen := cStringLen(pkg) + nameLen := cStringLen(name) + if pkgLen == 0 { + return safeGoString(name, "") + } + if nameLen == 0 { + return safeGoString(pkg, "") + } + buf := make([]byte, 0, pkgLen+1+nameLen) + buf = cStringAppend(buf, pkg) + buf = append(buf, '.') + buf = cStringAppend(buf, name) + return string(buf) +} + +func funcInfoJoinFile(rootID, nameID uint16) string { + root := funcInfoCString(rootID) + name := funcInfoCString(nameID) + rootLen := cStringLen(root) + nameLen := cStringLen(name) + if rootLen == 0 { + return safeGoString(name, "") + } + if nameLen == 0 { + return safeGoString(root, "") + } + buf := make([]byte, 0, rootLen+nameLen) + buf = cStringAppend(buf, root) + buf = cStringAppend(buf, name) + return string(buf) +} + func funcInfoForSymbol(symbol string) *runtimeFuncInfoRecord { if symbol == "" || runtimeFuncInfoTable == nil || runtimeFuncInfoCount == 0 { return nil @@ -244,13 +324,13 @@ func funcInfoForSymbol(symbol string) *runtimeFuncInfoRecord { if runtimeFuncInfoHash != nil && runtimeFuncInfoHashMask != 0 { slot := funcInfoHashString(symbol) & runtimeFuncInfoHashMask for probes := uintptr(0); probes <= runtimeFuncInfoHashMask; probes++ { - idx := *(*uint32)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoHash), slot*unsafe.Sizeof(*runtimeFuncInfoHash))) + idx := *(*uint16)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoHash), slot*unsafe.Sizeof(*runtimeFuncInfoHash))) if idx == 0 { return nil } if uintptr(idx) <= runtimeFuncInfoCount { rec := funcInfoAt(uintptr(idx) - 1) - if cStringEqual(funcInfoCString(rec.symbol), symbol) { + if funcInfoSymbolEqual(rec, symbol) { return rec } } @@ -260,7 +340,7 @@ func funcInfoForSymbol(symbol string) *runtimeFuncInfoRecord { } for i := uintptr(0); i < runtimeFuncInfoCount; i++ { rec := funcInfoAt(i) - if cStringEqual(funcInfoCString(rec.symbol), symbol) { + if funcInfoSymbolEqual(rec, symbol) { return rec } } @@ -278,10 +358,10 @@ func applyFuncInfo(sym *pcSymbol, rawFunction string) { if rec == nil { return } - if name := safeGoString(funcInfoCString(rec.name), ""); name != "" { + if name := funcInfoJoinName(rec.namePkg, rec.nameName); name != "" { sym.function = publicFunctionName(name) } - if file := safeGoString(funcInfoCString(rec.file), ""); file != "" { + if file := funcInfoJoinFile(rec.fileRoot, rec.fileName); file != "" { if sym.file == "" { sym.file = file } @@ -392,7 +472,13 @@ func (ci *Frames) Next() (frame Frame, more bool) { } var f *Func if sym.entry != 0 || fn != "" { - f = &Func{entry: sym.entry, name: fn} + f = &Func{ + entry: sym.entry, + name: fn, + pc: pc, + file: sym.file, + line: sym.line, + } } ci.frames = append(ci.frames, Frame{ PC: pc, @@ -438,6 +524,9 @@ func CallersFrames(callers []uintptr) *Frames { type Func struct { entry uintptr name string + pc uintptr + file string + line int } func (f *Func) Name() string { @@ -455,6 +544,9 @@ func (f *Func) Entry() uintptr { } func (f *Func) FileLine(pc uintptr) (file string, line int) { + if f != nil && f.pc == pc && (f.file != "" || f.line != 0) { + return f.file, f.line + } sym := frameSymbol(pc) return sym.file, sym.line } diff --git a/runtime/internal/runtime/caller.go b/runtime/internal/runtime/caller.go index ba2a212fed..e4cbd3cf03 100644 --- a/runtime/internal/runtime/caller.go +++ b/runtime/internal/runtime/caller.go @@ -19,6 +19,7 @@ package runtime import ( "unsafe" + clitedebug "github.com/goplus/llgo/runtime/internal/clite/debug" "github.com/goplus/llgo/runtime/internal/clite/tls" ) @@ -31,35 +32,28 @@ type CallerFrame struct { StartLine int } +const callerLocationLimit = 4096 + const ( callerPCMask = uintptr(3) callerPCValue = uintptr(1) callersPCValue = uintptr(3) - callerPCRingSize = 1024 + callerPCHashInit = 64 ) -type callerPCStore struct { - next uintptr - frames [callerPCRingSize]CallerFrame +type callerLocationStore struct { + frames []CallerFrame + stack []CallerFrame + synthetic []CallerFrame + syntheticHash []uintptr } -var ( - callerFrameTLS = tls.Alloc[[]CallerFrame](nil) - callerPCStoreTLS = tls.Alloc[*callerPCStore](nil) - callerLookupTLS = tls.Alloc[bool](nil) - panicCallerFrameTLS = tls.Alloc[[]CallerFrame](nil) -) +var callerLocationTLS = tls.Alloc[*callerLocationStore](nil) -var ( - runtimeCallersFrame = CallerFrame{Function: "runtime.Callers"} - runtimeMainFrame = CallerFrame{Function: "runtime.main"} - runtimeGoexitFrame = CallerFrame{Function: "runtime.goexit"} -) - -func PushCallerFrame(entry uintptr, name, file string, startLine int) int { - frames := callerFrameTLS.Get() - mark := len(frames) - frames = append(frames, CallerFrame{ +func PushCallerLocationFrame(entry uintptr, name, file string, startLine int) int { + store := callerLocationStoreForThread() + mark := len(store.stack) + store.stack = append(store.stack, CallerFrame{ PC: entry, Entry: entry, Function: name, @@ -67,107 +61,114 @@ func PushCallerFrame(entry uintptr, name, file string, startLine int) int { Line: startLine, StartLine: startLine, }) - callerFrameTLS.Set(frames) return mark } -func SetCallerLine(line int) { - frames := callerFrameTLS.Get() - if line <= 0 || len(frames) == 0 { +func PopCallerLocationFrame(mark int) { + store := callerLocationTLS.Get() + if store == nil { return } - frames[len(frames)-1].Line = line - callerFrameTLS.Set(frames) -} - -func SetCallerLookupLine(line int) { - SetCallerLine(line) - callerLookupTLS.Set(true) -} - -func PopCallerFrame(mark int) { - frames := callerFrameTLS.Get() - oldLen := len(frames) + oldLen := len(store.stack) if mark < 0 || mark > oldLen { return } var zero CallerFrame for i := mark; i < oldLen; i++ { - frames[i] = zero + store.stack[i] = zero } - callerFrameTLS.Set(frames[:mark]) + store.stack = store.stack[:mark] +} - panicFrames := panicCallerFrameTLS.Get() - if len(panicFrames) > 0 && oldLen >= len(panicFrames) && mark <= len(panicFrames) { - for i := range panicFrames { - panicFrames[i] = zero - } - panicCallerFrameTLS.Clear() +func RecordCallerLocation(entry uintptr, name, file string, line int) { + if entry == 0 || line <= 0 { + return } + updateCurrentFrame(entry, name, file, line) + recordPCLocation(0, entry, name, file, line) } -func SavePanicCallerFrames() { - frames := callerFrameTLS.Get() - if len(frames) == 0 { - panicCallerFrameTLS.Clear() +func RecordPanicLocation(entry uintptr, name, file string, line int) { + if entry == 0 || line <= 0 { + return + } + updateCurrentFrame(entry, name, file, line) + recordPCLocation(0, entry, name, file, line) +} + +func updateCurrentFrame(entry uintptr, name, file string, line int) { + store := callerLocationTLS.Get() + if store == nil { return } - panicFrames := panicCallerFrameTLS.Get() - if cap(panicFrames) < len(frames) { - panicFrames = make([]CallerFrame, len(frames)) - } else { - panicFrames = panicFrames[:len(frames)] + for i := len(store.stack) - 1; i >= 0; i-- { + frame := &store.stack[i] + if frame.Entry == entry { + frame.Function = name + frame.File = file + frame.Line = line + return + } } - copy(panicFrames, frames) - panicCallerFrameTLS.Set(panicFrames) } -func Caller(skip int) (CallerFrame, bool) { - if !takeCallerLookup() { - return CallerFrame{}, false +func recordPCLocation(pc, entry uintptr, name, file string, line int) { + store := callerLocationStoreForThread() + for i := range store.frames { + frame := &store.frames[i] + if (pc != 0 && frame.PC == pc) || (pc == 0 && frame.PC == 0 && frame.Entry == entry) { + frame.PC = pc + frame.Entry = entry + frame.Function = name + frame.File = file + frame.Line = line + return + } + } + if len(store.frames) >= callerLocationLimit { + copy(store.frames, store.frames[1:]) + store.frames[len(store.frames)-1] = CallerFrame{} + store.frames = store.frames[:len(store.frames)-1] } + store.frames = append(store.frames, CallerFrame{ + PC: pc, + Entry: entry, + Function: name, + File: file, + Line: line, + }) +} + +func Caller(skip int) (CallerFrame, bool) { if skip < 0 { return CallerFrame{}, false } - frames := callerFrameTLS.Get() - panicFrames := panicCallerFrameTLS.Get() - if len(frames) == 0 { - if skip < len(panicFrames) { - return captureFrame(panicFrames[len(panicFrames)-1-skip], callerPCValue), true - } + store := callerLocationTLS.Get() + if store == nil || len(store.stack) == 0 { return CallerFrame{}, false } - if skip < len(frames) { - return captureFrame(frames[len(frames)-1-skip], callerPCValue), true + if skip < len(store.stack) { + return store.captureFrame(store.stack[len(store.stack)-1-skip], callerPCValue), true } - if len(panicFrames) > len(frames) { - idx := len(panicFrames) - 1 - skip - if idx >= 0 { - return captureFrame(panicFrames[idx], callerPCValue), true - } - } - switch skip - len(frames) { + switch skip - len(store.stack) { case 0: - return captureFrame(runtimeMainFrame, callerPCValue), true + return store.captureFrame(runtimeMainFrame, callerPCValue), true case 1: - return captureFrame(runtimeGoexitFrame, callerPCValue), true + return store.captureFrame(runtimeGoexitFrame, callerPCValue), true default: return CallerFrame{}, false } } func Callers(skip int, pcs []uintptr) int { - if !takeCallerLookup() { + if len(pcs) == 0 { return 0 } if skip < 0 { skip = 0 } - frames := callerFrameTLS.Get() - if len(frames) == 0 { - frames = panicCallerFrameTLS.Get() - } - if len(frames) == 0 { + store := callerLocationTLS.Get() + if store == nil || len(store.stack) == 0 { return 0 } n := 0 @@ -179,15 +180,15 @@ func Callers(skip int, pcs []uintptr) int { if n >= len(pcs) { return false } - pcs[n] = captureFrame(frame, callersPCValue).PC + pcs[n] = store.captureFrame(frame, callersPCValue).PC n++ return true } if !add(runtimeCallersFrame) { return n } - for i := len(frames) - 1; i >= 0; i-- { - if !add(frames[i]) { + for i := len(store.stack) - 1; i >= 0; i-- { + if !add(store.stack[i]) { return n } } @@ -196,59 +197,234 @@ func Callers(skip int, pcs []uintptr) int { return n } -func takeCallerLookup() bool { - if !callerLookupTLS.Get() { - return false +func SavePanicCallerFrames() { +} + +func BindCallerLocation(pc uintptr, rawName string) { + store := callerLocationTLS.Get() + if store == nil || pc == 0 { + return + } + if frame, ok := callerLocationByName(store, rawName); ok { + bindCallerLocationPC(pc, frame) + return + } +} + +var ( + runtimeCallersFrame = CallerFrame{Function: "runtime.Callers"} + runtimeMainFrame = CallerFrame{Function: "runtime.main"} + runtimeGoexitFrame = CallerFrame{Function: "runtime.goexit"} +) + +func callerLocationByName(store *callerLocationStore, rawName string) (CallerFrame, bool) { + if rawName == "" { + return CallerFrame{}, false + } + name := normalizeRuntimeFuncName(rawName) + for i := len(store.frames) - 1; i >= 0; i-- { + frame := store.frames[i] + if frame.PC == 0 && frame.Function == name && frame.Line != 0 { + return frame, true + } + } + return CallerFrame{}, false +} + +func bindCallerLocationPC(pc uintptr, frame CallerFrame) { + recordPCLocation(pc, frame.Entry, frame.Function, frame.File, frame.Line) + if pc > 0 { + recordPCLocation(pc-1, frame.Entry, frame.Function, frame.File, frame.Line) } - callerLookupTLS.Set(false) - return true } func FrameForPC(pc uintptr) (CallerFrame, bool) { - if pc&callerPCMask == 0 { + if pc&callerPCMask != 0 { + if frame, ok := syntheticFrameForPC(pc); ok { + return frame, true + } + } + store := callerLocationTLS.Get() + if store == nil || pc == 0 { + return CallerFrame{}, false + } + for i := len(store.frames) - 1; i >= 0; i-- { + frame := store.frames[i] + if frame.PC == pc { + return frame, true + } + } + entry := entryForPC(pc) + if entry == 0 { return CallerFrame{}, false } - store := callerPCStoreTLS.Get() + var best CallerFrame + for _, frame := range store.frames { + if frame.PC == 0 || frame.PC > pc || frame.Entry != entry { + continue + } + if best.PC == 0 || frame.PC > best.PC { + best = frame + } + } + if best.PC != 0 { + best.PC = pc + return best, true + } + for i := len(store.frames) - 1; i >= 0; i-- { + frame := store.frames[i] + if frame.PC == 0 && frame.Entry == entry { + frame.PC = pc + return frame, true + } + } + return CallerFrame{}, false +} + +func syntheticFrameForPC(pc uintptr) (CallerFrame, bool) { + store := callerLocationTLS.Get() if store == nil { return CallerFrame{}, false } - addr := pc &^ callerPCMask - if !store.contains(addr) { + seq := pc >> 2 + if seq == 0 || seq > uintptr(len(store.synthetic)) { + return CallerFrame{}, false + } + frame := store.synthetic[seq-1] + if frame.PC>>2 != seq { return CallerFrame{}, false } - frame := *(*CallerFrame)(unsafe.Pointer(addr)) + frame.PC = pc + if frame.Entry == 0 { + frame.Entry = pc + } return frame, true } -func callerPCStoreForThread() *callerPCStore { - store := callerPCStoreTLS.Get() +func callerLocationStoreForThread() *callerLocationStore { + store := callerLocationTLS.Get() if store == nil { - store = new(callerPCStore) - callerPCStoreTLS.Set(store) + store = new(callerLocationStore) + callerLocationTLS.Set(store) } return store } -func captureFrame(frame CallerFrame, pcValue uintptr) CallerFrame { - store := callerPCStoreForThread() - idx := store.next & (callerPCRingSize - 1) - store.next++ - store.frames[idx] = frame - rec := &store.frames[idx] - pc := uintptr(unsafe.Pointer(rec)) | pcValue - rec.PC = pc +func (s *callerLocationStore) captureFrame(frame CallerFrame, pcValue uintptr) CallerFrame { + idx := s.internSyntheticFrame(frame) + rec := s.synthetic[idx] + seq := uintptr(idx + 1) + rec.PC = (seq << 2) | pcValue if rec.Entry == 0 { - rec.Entry = pc + rec.Entry = rec.PC + } + return rec +} + +func (s *callerLocationStore) internSyntheticFrame(frame CallerFrame) int { + if len(s.syntheticHash) == 0 { + s.syntheticHash = make([]uintptr, callerPCHashInit) + } + if len(s.synthetic)*2 >= len(s.syntheticHash) { + s.rehashSyntheticFrames(len(s.syntheticHash) * 2) + } + slot := s.syntheticSlot(frame) + for { + idx := s.syntheticHash[slot] + if idx == 0 { + frame.PC = (uintptr(len(s.synthetic)+1) << 2) | callerPCValue + s.synthetic = append(s.synthetic, frame) + s.syntheticHash[slot] = uintptr(len(s.synthetic)) + return len(s.synthetic) - 1 + } + existing := s.synthetic[idx-1] + if sameSyntheticFrame(existing, frame) { + return int(idx - 1) + } + slot = (slot + 1) & (uintptr(len(s.syntheticHash)) - 1) + } +} + +func (s *callerLocationStore) rehashSyntheticFrames(size int) { + old := s.syntheticHash + s.syntheticHash = make([]uintptr, size) + for _, idx := range old { + if idx == 0 { + continue + } + frame := s.synthetic[idx-1] + slot := s.syntheticSlot(frame) + for s.syntheticHash[slot] != 0 { + slot = (slot + 1) & (uintptr(len(s.syntheticHash)) - 1) + } + s.syntheticHash[slot] = idx + } +} + +func (s *callerLocationStore) syntheticSlot(frame CallerFrame) uintptr { + h := frame.Entry ^ (uintptr(frame.Line) << 12) ^ (uintptr(frame.StartLine) << 24) + h ^= uintptr(len(frame.Function)) << 4 + h ^= uintptr(len(frame.File)) << 8 + return h & (uintptr(len(s.syntheticHash)) - 1) +} + +func sameSyntheticFrame(a, b CallerFrame) bool { + return a.Entry == b.Entry && + a.Function == b.Function && + a.File == b.File && + a.Line == b.Line && + a.StartLine == b.StartLine +} + +func entryForPC(pc uintptr) uintptr { + var info clitedebug.Info + if clitedebug.Addrinfo(unsafe.Pointer(pc), &info) == 0 { + return 0 + } + return uintptr(info.Saddr) +} + +func normalizeRuntimeFuncName(name string) string { + const commandLineArguments = "command-line-arguments." + if hasPrefix(name, commandLineArguments) { + name = "main." + name[len(commandLineArguments):] } - return *rec + if len(name) > 0 && name[0] == '_' { + name = name[1:] + } + return normalizeRuntimeAnonFuncName(name) } -func (s *callerPCStore) contains(addr uintptr) bool { - start := uintptr(unsafe.Pointer(&s.frames[0])) - size := unsafe.Sizeof(s.frames) - end := start + size - if addr < start || addr >= end { +func normalizeRuntimeAnonFuncName(name string) string { + dollar := lastIndexByte(name, '$') + if dollar < 0 || dollar == len(name)-1 { + return name + } + for i := dollar + 1; i < len(name); i++ { + if name[i] < '0' || name[i] > '9' { + return name + } + } + return name[:dollar] + ".func" + name[dollar+1:] +} + +func hasPrefix(s, prefix string) bool { + if len(s) < len(prefix) { return false } - return (addr-start)%unsafe.Sizeof(s.frames[0]) == 0 + for i := 0; i < len(prefix); i++ { + if s[i] != prefix[i] { + return false + } + } + return true +} + +func lastIndexByte(s string, c byte) int { + for i := len(s) - 1; i >= 0; i-- { + if s[i] == c { + return i + } + } + return -1 } From a3115cd69c11fe214aeee474ae2d7e1abb9c88c4 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Tue, 30 Jun 2026 13:47:42 +0800 Subject: [PATCH 06/59] test: cover indirect runtime caller paths --- cl/caller_frame_test.go | 10 +++++- test/go/runtime_statement_line_test.go | 49 ++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index 4239a062a9..b53fe93d97 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -134,10 +134,18 @@ func TestRuntimeCallerPackageDetection(t *testing.T) { import "runtime" import "runtime/debug" +type callerIface interface { Call() } +type callerImpl struct{} + func direct() { runtime.Caller(0) } func indirect() { direct() } func dynamic(f func()) { f() } func dynamicCaller() { dynamic(direct) } +func (callerImpl) Call() { direct() } +func interfaceDispatch(c callerIface) { c.Call() } +func interfaceCaller(c callerIface) { interfaceDispatch(c) } +func closureLayer(next func()) func() { return func() { next() } } +func closureCaller() { closureLayer(closureLayer(direct))() } func stack() { _ = debug.Stack() } func anonOnly() { func() { runtime.FuncForPC(0) }() } func plain() {} @@ -161,7 +169,7 @@ func plain() {} t.Fatal("plain function should not report runtime caller usage") } runtimeCallerFuncs := runtimeCallerFuncSet(ssapkg) - for _, name := range []string{"dynamic", "dynamicCaller"} { + for _, name := range []string{"dynamic", "dynamicCaller", "interfaceDispatch", "interfaceCaller", "closureLayer", "closureCaller"} { if !runtimeCallerFuncs[ssapkg.Func(name)] { t.Fatalf("%s should be tracked because dynamic calls may reach runtime stack APIs", name) } diff --git a/test/go/runtime_statement_line_test.go b/test/go/runtime_statement_line_test.go index d534b36dc8..81430b49f0 100644 --- a/test/go/runtime_statement_line_test.go +++ b/test/go/runtime_statement_line_test.go @@ -45,6 +45,8 @@ func (w Wrapper) Get(i int) int { func main() { checkCallerStatement() checkCallersFramesStatement() + checkInterfaceIndirectCaller() + checkClosureIndirectCaller() checkAdjacentRuntimeStack() checkRecoveredDebugStackBounds() } @@ -89,6 +91,51 @@ func checkCallersFramesStatement() { panic("missing callers frame") } +type indirectCaller interface { + call() +} + +type indirectCallerImpl struct{} + +//go:noinline +func checkInterfaceIndirectCaller() { + var c indirectCaller = indirectCallerImpl{} + c.call() // INTERFACE_CALL_MARK +} + +//go:noinline +func (indirectCallerImpl) call() { + interfaceMiddle() +} + +//go:noinline +func interfaceMiddle() { + checkCallerLine("interface", 2, INTERFACE_CALL_LINE) +} + +//go:noinline +func checkClosureIndirectCaller() { + f := closureLayer(closureLayer(func() { + checkCallerLine("closure", 3, CLOSURE_CALL_LINE) + })) + f() // CLOSURE_CALL_MARK +} + +//go:noinline +func closureLayer(next func()) func() { + return func() { + next() + } +} + +//go:noinline +func checkCallerLine(kind string, skip, want int) { + _, file, line, ok := runtime.Caller(skip) + if !ok || !strings.HasSuffix(file, "main.go") || line != want { + panic("bad " + kind + " indirect caller line: " + file + ":" + strconv.Itoa(line)) + } +} + //go:noinline func checkAdjacentRuntimeStack() { var buf1, buf2 [4096]byte @@ -142,6 +189,8 @@ func TestRuntimeStatementLineInfo(t *testing.T) { source := runtimeStatementLineProbe source = strings.ReplaceAll(source, "CALLER_STMT_LINE", strconv.Itoa(markerLine(source, "CALLER_STMT_MARK"))) source = strings.ReplaceAll(source, "CALLERS_STMT_LINE", strconv.Itoa(markerLine(source, "CALLERS_STMT_MARK"))) + source = strings.ReplaceAll(source, "INTERFACE_CALL_LINE", strconv.Itoa(markerLine(source, "INTERFACE_CALL_MARK"))) + source = strings.ReplaceAll(source, "CLOSURE_CALL_LINE", strconv.Itoa(markerLine(source, "CLOSURE_CALL_MARK"))) source = strings.ReplaceAll(source, "STACK_ONE_LINE", strconv.Itoa(markerLine(source, "STACK_ONE_MARK"))) source = strings.ReplaceAll(source, "STACK_TWO_LINE", strconv.Itoa(markerLine(source, "STACK_TWO_MARK"))) source = strings.ReplaceAll(source, "BOUNDS_LINE", strconv.Itoa(markerLine(source, "BOUNDS_MARK"))) From c6c128a2a453f6f15abb871c294a38344c245e8c Mon Sep 17 00:00:00 2001 From: Li Jie Date: Tue, 30 Jun 2026 19:18:19 +0800 Subject: [PATCH 07/59] cl: narrow runtime caller tracking --- cl/caller_frame_test.go | 13 ++ cl/instr.go | 356 +++++++++++++++++++++++++++++++++++----- 2 files changed, 330 insertions(+), 39 deletions(-) diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index b53fe93d97..2a4c6eda4b 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -136,6 +136,8 @@ import "runtime/debug" type callerIface interface { Call() } type callerImpl struct{} +type workerIface interface { Work() } +type workerImpl struct{} func direct() { runtime.Caller(0) } func indirect() { direct() } @@ -148,6 +150,12 @@ func closureLayer(next func()) func() { return func() { next() } } func closureCaller() { closureLayer(closureLayer(direct))() } func stack() { _ = debug.Stack() } func anonOnly() { func() { runtime.FuncForPC(0) }() } +func leaf() {} +func callFunc(f func()) { f() } +func callFuncHot() { callFunc(leaf) } +func (workerImpl) Work() {} +func callWorker(w workerIface) { w.Work() } +func workerHot() { var w workerIface = workerImpl{}; callWorker(w) } func plain() {} `) if !packageUsesRuntimeCaller(ssapkg) { @@ -174,6 +182,11 @@ func plain() {} t.Fatalf("%s should be tracked because dynamic calls may reach runtime stack APIs", name) } } + for _, name := range []string{"leaf", "callFunc", "callFuncHot", "callWorker", "workerHot"} { + if runtimeCallerFuncs[ssapkg.Func(name)] { + t.Fatalf("%s should not be tracked when resolved dynamic targets do not reach runtime stack APIs", name) + } + } if runtimeCallerFuncs[ssapkg.Func("plain")] { t.Fatal("plain function should not be tracked") } diff --git a/cl/instr.go b/cl/instr.go index 357107167b..2a87f99334 100644 --- a/cl/instr.go +++ b/cl/instr.go @@ -882,28 +882,41 @@ func packageUsesRuntimeCaller(pkg *ssa.Package) bool { } func fnUsesRuntimeCaller(fn *ssa.Function) bool { - return runtimeCallerFuncSetFor(fn, make(map[*ssa.Function]bool), make(map[*ssa.Function]bool), false) + if fn == nil { + return false + } + if fn.Pkg == nil { + return fnHasDirectRuntimeCaller(fn) + } + return runtimeCallerFuncSet(fn.Pkg)[fn] } func runtimeCallerFuncSet(pkg *ssa.Package) map[*ssa.Function]bool { if pkg == nil { return nil } - dynamicCallsMayReachRuntimeCaller := packageHasDirectRuntimeCaller(pkg) - if !dynamicCallsMayReachRuntimeCaller { - return nil + funcs, trackable := collectRuntimeCallerFunctions(pkg) + analysis := &runtimeCallerAnalysis{ + pkg: pkg, + funcs: funcs, + trackable: trackable, + callsites: collectRuntimeCallerCallsites(funcs), + memo: make(map[*ssa.Function]bool), + visiting: make(map[*ssa.Function]bool), } - memo := make(map[*ssa.Function]bool) - visiting := make(map[*ssa.Function]bool) - for _, member := range pkg.Members { - if fn, ok := member.(*ssa.Function); ok { - runtimeCallerFuncSetFor(fn, memo, visiting, dynamicCallsMayReachRuntimeCaller) - } + if !analysis.packageHasRuntimeCaller() { + return nil } out := make(map[*ssa.Function]bool) - for fn, ok := range memo { - if ok { - out[fn] = true + for { + ntrack := len(trackable) + for fn := range trackable { + if analysis.fnMayReachRuntimeCaller(fn) { + out[fn] = true + } + } + if len(trackable) == ntrack { + break } } if len(out) == 0 { @@ -912,9 +925,105 @@ func runtimeCallerFuncSet(pkg *ssa.Package) map[*ssa.Function]bool { return out } -func packageHasDirectRuntimeCaller(pkg *ssa.Package) bool { +type runtimeCallerAnalysis struct { + pkg *ssa.Package + funcs map[*ssa.Function]bool + trackable map[*ssa.Function]bool + callsites map[*ssa.Function][]*ssa.CallCommon + memo map[*ssa.Function]bool + visiting map[*ssa.Function]bool +} + +func collectRuntimeCallerFunctions(pkg *ssa.Package) (funcs, trackable map[*ssa.Function]bool) { + funcs = make(map[*ssa.Function]bool) + trackable = make(map[*ssa.Function]bool) + var add func(*ssa.Function, bool) bool + add = func(fn *ssa.Function, track bool) bool { + if fn == nil || !functionBelongsToPackage(pkg, fn) { + return false + } + if track { + trackable[fn] = true + } + if funcs[fn] { + return false + } + funcs[fn] = true + for _, anon := range fn.AnonFuncs { + add(anon, false) + } + return true + } for _, member := range pkg.Members { - if fn, ok := member.(*ssa.Function); ok && fnHasDirectRuntimeCaller(fn) { + if fn, ok := member.(*ssa.Function); ok { + add(fn, true) + } + } + if pkg.Prog != nil && pkg.Pkg != nil { + for _, typ := range pkg.Prog.RuntimeTypes() { + if !typeBelongsToPackage(typ, pkg.Pkg) { + continue + } + methods := pkg.Prog.MethodSets.MethodSet(typ) + for i := 0; i < methods.Len(); i++ { + add(pkg.Prog.MethodValue(methods.At(i)), false) + } + } + } + for changed := true; changed; { + changed = false + for fn := range funcs { + forEachCall(fn, func(call *ssa.CallCommon) { + if add(call.StaticCallee(), trackable[fn]) { + changed = true + } + }) + } + } + return funcs, trackable +} + +func collectRuntimeCallerCallsites(funcs map[*ssa.Function]bool) map[*ssa.Function][]*ssa.CallCommon { + callsites := make(map[*ssa.Function][]*ssa.CallCommon) + for fn := range funcs { + forEachCall(fn, func(call *ssa.CallCommon) { + callee := call.StaticCallee() + if funcs[callee] { + callsites[callee] = append(callsites[callee], call) + } + }) + } + return callsites +} + +func functionBelongsToPackage(pkg *ssa.Package, fn *ssa.Function) bool { + if pkg == nil || fn == nil { + return false + } + if fn.Pkg == pkg { + return true + } + return fn.Pkg == nil && fn.Parent() != nil && functionBelongsToPackage(pkg, fn.Parent()) +} + +func typeBelongsToPackage(typ types.Type, pkg *types.Package) bool { + if pkg == nil { + return false + } + for { + if ptr, ok := types.Unalias(typ).(*types.Pointer); ok { + typ = ptr.Elem() + continue + } + break + } + named, ok := types.Unalias(typ).(*types.Named) + return ok && named.Obj() != nil && named.Obj().Pkg() == pkg +} + +func (a *runtimeCallerAnalysis) packageHasRuntimeCaller() bool { + for fn := range a.funcs { + if fnHasDirectRuntimeCaller(fn) { return true } } @@ -944,46 +1053,215 @@ func fnHasDirectRuntimeCaller(fn *ssa.Function) bool { return false } -func runtimeCallerFuncSetFor(fn *ssa.Function, memo, visiting map[*ssa.Function]bool, dynamicCallsMayReachRuntimeCaller bool) bool { +func (a *runtimeCallerAnalysis) fnMayReachRuntimeCaller(fn *ssa.Function) bool { if fn == nil { return false } - if ok, done := memo[fn]; done { + if isRuntimeCallerFunc(fn) { + return true + } + if !a.funcs[fn] { + return false + } + if ok, done := a.memo[fn]; done { return ok } - if visiting[fn] { + if a.visiting[fn] { return false } - visiting[fn] = true - defer delete(visiting, fn) - for _, block := range fn.Blocks { - for _, instr := range block.Instrs { - call, ok := instr.(ssa.CallInstruction) - if !ok { - continue - } - callee := call.Common().StaticCallee() - if callee == nil && dynamicCallsMayReachRuntimeCaller { - memo[fn] = true - return true - } - if isRuntimeCallerFunc(callee) || - (callee != nil && callee.Pkg == fn.Pkg && runtimeCallerFuncSetFor(callee, memo, visiting, dynamicCallsMayReachRuntimeCaller)) { - memo[fn] = true - return true + a.visiting[fn] = true + defer delete(a.visiting, fn) + reaches := false + forEachCall(fn, func(call *ssa.CallCommon) { + if reaches { + return + } + callee := call.StaticCallee() + switch { + case isRuntimeCallerFunc(callee): + reaches = true + case callee != nil: + reaches = a.fnMayReachRuntimeCaller(callee) + case call.Method != nil: + reaches = a.interfaceInvokeMayReachRuntimeCaller(fn, call) + default: + reaches = a.functionValueCallMayReachRuntimeCaller(fn, call.Value) + } + }) + if !reaches { + for _, anon := range fn.AnonFuncs { + if a.fnMayReachRuntimeCaller(anon) { + if a.trackable[fn] { + a.trackable[anon] = true + } + reaches = true + break } } } - for _, anon := range fn.AnonFuncs { - if runtimeCallerFuncSetFor(anon, memo, visiting, dynamicCallsMayReachRuntimeCaller) { - memo[fn] = true + a.memo[fn] = reaches + return reaches +} + +func (a *runtimeCallerAnalysis) functionValueCallMayReachRuntimeCaller(fn *ssa.Function, value ssa.Value) bool { + targets, ok := a.functionValueTargets(fn, value) + if !ok { + return true + } + for target := range targets { + if a.fnMayReachRuntimeCaller(target) { + return true + } + } + return false +} + +func (a *runtimeCallerAnalysis) functionValueTargets(fn *ssa.Function, value ssa.Value) (map[*ssa.Function]bool, bool) { + if targets, ok := staticFunctionTargets(value); ok { + return targets, true + } + param, ok := value.(*ssa.Parameter) + if !ok || param.Parent() != fn { + return nil, false + } + idx, ok := parameterIndex(fn, param) + if !ok { + return nil, false + } + return a.functionParamTargets(fn, idx) +} + +func (a *runtimeCallerAnalysis) functionParamTargets(fn *ssa.Function, idx int) (map[*ssa.Function]bool, bool) { + callsites := a.callsites[fn] + if len(callsites) == 0 { + return nil, false + } + targets := make(map[*ssa.Function]bool) + for _, call := range callsites { + args := call.Args + if idx >= len(args) { + return nil, false + } + argTargets, ok := staticFunctionTargets(args[idx]) + if !ok { + return nil, false + } + for target := range argTargets { + targets[target] = true + } + } + return targets, true +} + +func staticFunctionTargets(value ssa.Value) (map[*ssa.Function]bool, bool) { + switch v := value.(type) { + case *ssa.Function: + return map[*ssa.Function]bool{v: true}, true + case *ssa.MakeClosure: + if fn, ok := v.Fn.(*ssa.Function); ok { + return map[*ssa.Function]bool{fn: true}, true + } + } + return nil, false +} + +func (a *runtimeCallerAnalysis) interfaceInvokeMayReachRuntimeCaller(fn *ssa.Function, call *ssa.CallCommon) bool { + targets, ok := a.interfaceMethodTargets(fn, call.Value, call.Method) + if !ok { + return true + } + for target := range targets { + if a.fnMayReachRuntimeCaller(target) { return true } } - memo[fn] = false return false } +func (a *runtimeCallerAnalysis) interfaceMethodTargets(fn *ssa.Function, value ssa.Value, method *types.Func) (map[*ssa.Function]bool, bool) { + if targets, ok := a.staticInterfaceMethodTargets(value, method); ok { + return targets, true + } + param, ok := value.(*ssa.Parameter) + if !ok || param.Parent() != fn { + return nil, false + } + idx, ok := parameterIndex(fn, param) + if !ok { + return nil, false + } + callsites := a.callsites[fn] + if len(callsites) == 0 { + return nil, false + } + targets := make(map[*ssa.Function]bool) + for _, call := range callsites { + args := call.Args + if idx >= len(args) { + return nil, false + } + argTargets, ok := a.staticInterfaceMethodTargets(args[idx], method) + if !ok { + return nil, false + } + for target := range argTargets { + targets[target] = true + } + } + return targets, true +} + +func (a *runtimeCallerAnalysis) staticInterfaceMethodTargets(value ssa.Value, method *types.Func) (map[*ssa.Function]bool, bool) { + switch v := value.(type) { + case *ssa.MakeInterface: + return a.methodTargetsForType(v.X.Type(), method) + case *ssa.ChangeInterface: + return a.staticInterfaceMethodTargets(v.X, method) + } + return nil, false +} + +func (a *runtimeCallerAnalysis) methodTargetsForType(typ types.Type, method *types.Func) (map[*ssa.Function]bool, bool) { + if a.pkg == nil || a.pkg.Prog == nil || method == nil { + return nil, false + } + methods := a.pkg.Prog.MethodSets.MethodSet(typ) + for i := 0; i < methods.Len(); i++ { + sel := methods.At(i) + if sel.Obj().Name() != method.Name() { + continue + } + fn := a.pkg.Prog.MethodValue(sel) + if fn == nil { + return nil, false + } + return map[*ssa.Function]bool{fn: true}, true + } + return nil, false +} + +func parameterIndex(fn *ssa.Function, param *ssa.Parameter) (int, bool) { + for i, candidate := range fn.Params { + if candidate == param { + return i, true + } + } + return 0, false +} + +func forEachCall(fn *ssa.Function, do func(*ssa.CallCommon)) { + if fn == nil { + return + } + for _, block := range fn.Blocks { + for _, instr := range block.Instrs { + if call, ok := instr.(ssa.CallInstruction); ok { + do(call.Common()) + } + } + } +} + func isRuntimeCallerFunc(fn *ssa.Function) bool { if fn == nil || fn.Pkg == nil || fn.Pkg.Pkg == nil { return false From 4692e15b823d23f69dc459a9f811c54effcfc9f2 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Tue, 30 Jun 2026 21:44:48 +0800 Subject: [PATCH 08/59] runtime: add compact pc-line funcinfo table --- cl/caller_frame_test.go | 95 ++++++ cl/compile.go | 16 +- cl/instr.go | 108 +++++++ internal/build/build.go | 5 +- internal/build/funcinfo/funcinfo.go | 68 ++++- internal/build/funcinfo/funcinfo_test.go | 30 ++ internal/build/funcinfo_table.go | 176 ++++++++++- internal/build/funcinfo_table_test.go | 60 ++++ internal/build/main_module.go | 3 +- runtime/internal/clite/debug/_wrap/debug.c | 7 + runtime/internal/clite/debug/debug.go | 3 + runtime/internal/lib/runtime/symtab.go | 339 ++++++++++++++++++++- ssa/funcinfo.go | 30 ++ 13 files changed, 919 insertions(+), 21 deletions(-) diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index 2a4c6eda4b..d3b53cf582 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -338,6 +338,101 @@ func f() { } } +func TestCompileRuntimeCallerPCLineMetadata(t *testing.T) { + ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +import "runtime" + +func top() { + runtime.Caller(0) + leaf() +} + +func leaf() {} +`) + prog := newLLSSAProg(t) + prog.Target().GOOS = "linux" + prog.Target().GOARCH = "amd64" + prog.EnableFuncInfoMetadata(true) + pkg, err := NewPackage(prog, ssapkg, files) + if err != nil { + t.Fatal(err) + } + ir := pkg.Module().String() + for _, want := range []string{ + `!llgo.pcline = !{!`, + `!"example.com/foo.top"`, + `!"caller_frame_compile.go"`, + "__llgo_pcsite_", + `.pushsection llgo_pcline`, + `.quad __llgo_pcsite_`, + } { + if !strings.Contains(ir, want) { + t.Fatalf("missing pcline metadata %s:\n%s", want, ir) + } + } + for _, line := range strings.Split(ir, "\n") { + if strings.Contains(line, "!llgo.pcline") || strings.Contains(line, `!"example.com/foo.top"`) { + if strings.Contains(line, `ptr @`) { + t.Fatalf("pcline metadata should use symbol strings, not function pointers:\n%s", line) + } + } + } +} + +func TestCompileRuntimeCallerPCLineEscapesDollarInInlineAsm(t *testing.T) { + ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +import "runtime" + +func top() { + func() { + runtime.Caller(0) + }() +} +`) + prog := newLLSSAProg(t) + prog.Target().GOOS = "linux" + prog.Target().GOARCH = "amd64" + prog.EnableFuncInfoMetadata(true) + pkg, err := NewPackage(prog, ssapkg, files) + if err != nil { + t.Fatal(err) + } + ir := pkg.Module().String() + if !strings.Contains(ir, `!"example.com/foo.top$1"`) { + t.Fatalf("metadata should keep the original Go symbol name:\n%s", ir) + } + if !strings.Contains(ir, `example.com/foo.top$$1`) { + t.Fatalf("inline asm should escape $ in the associated symbol:\n%s", ir) + } + for _, line := range strings.Split(ir, "\n") { + if strings.Contains(line, `.pushsection llgo_pcline`) && strings.Contains(line, `example.com/foo.top$1`) && !strings.Contains(line, `example.com/foo.top$$1`) { + t.Fatalf("inline asm has an unescaped $ operand:\n%s", line) + } + } +} + +func TestCompileRuntimeCallerPCLineMetadataSkippedOnDarwin(t *testing.T) { + ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +import "runtime" + +func top() { + runtime.Caller(0) +} +`) + prog := newLLSSAProg(t) + prog.Target().GOOS = "darwin" + prog.Target().GOARCH = "arm64" + prog.EnableFuncInfoMetadata(true) + pkg, err := NewPackage(prog, ssapkg, files) + if err != nil { + t.Fatal(err) + } + ir := pkg.Module().String() + if strings.Contains(ir, `!llgo.pcline`) || strings.Contains(ir, "__llgo_pcsite_") { + t.Fatalf("darwin should not emit inline asm pc-site labels:\n%s", ir) + } +} + func TestCompileRuntimeCallerFrameUsesGoNameForLinkname(t *testing.T) { ssapkg, files := buildCallerFrameSSAPackage(t, "command-line-arguments", `package main import "runtime" diff --git a/cl/compile.go b/cl/compile.go index 20531efc4d..443a0cc9cb 100644 --- a/cl/compile.go +++ b/cl/compile.go @@ -177,6 +177,7 @@ type context struct { anonDefers map[*ssa.Function]bool paramDIVars map[*types.Var]llssa.DIVar runtimeCallerFuncs map[*ssa.Function]bool + pcLineSeq uint64 patches Patches blkInfos []blocks.Info @@ -549,10 +550,11 @@ func (p *context) compileFuncDecl(pkg llssa.Package, f *ssa.Function) (llssa.Fun } noInlineDirective := hasNoInlineDirective(f) runtimeStackNoInline := needsRuntimeStackNoInline(pkgTypes, f) - if disableInline || noInlineDirective || runtimeStackNoInline { + pcLineNoInline := p.needsPCLineNoInline(f) + if disableInline || noInlineDirective || runtimeStackNoInline || pcLineNoInline { fn.Inline(llssa.NoInline) } - if noInlineDirective || runtimeStackNoInline { + if noInlineDirective || runtimeStackNoInline || pcLineNoInline { fn.DisableTailCalls() } p.funcs[f] = fn @@ -680,6 +682,16 @@ func needsRuntimeStackNoInline(pkg *types.Package, f *ssa.Function) bool { return false } +func (p *context) needsPCLineNoInline(f *ssa.Function) bool { + if p == nil || f == nil || !p.prog.FuncInfoMetadataEnabled() || !p.trackCallerFrames || !p.runtimeCallerFuncs[f] { + return false + } + if !canEmitPCLineLabelsForTarget(p.prog.Target()) { + return false + } + return p.pkg != nil && canTrackCallerFramesForPackage(p.pkg.Path()) +} + func (p *context) getFuncBodyPos(f *ssa.Function) token.Position { if f.Object() != nil { if fn, ok := f.Object().(*types.Func); ok && fn.Scope() != nil { diff --git a/cl/instr.go b/cl/instr.go index 2a87f99334..be19636a60 100644 --- a/cl/instr.go +++ b/cl/instr.go @@ -1366,6 +1366,113 @@ func (p *context) recordCallerLocationForCall(b llssa.Builder, call *ssa.CallCom p.recordPanicLocation(b, call.Pos()) } +func (p *context) emitPCLineLabel(b llssa.Builder, pos token.Pos) { + if p == nil || p.pkg == nil || p.fn == nil || !p.prog.FuncInfoMetadataEnabled() || !p.shouldTrackCallerFrames() { + return + } + target := p.prog.Target() + if !canEmitPCLineLabelsForTarget(target) { + return + } + position := p.fset.Position(pos) + if position.Line <= 0 || position.Filename == "" { + return + } + p.pcLineSeq++ + id := pcLineID(p.fn.Name(), p.pcLineSeq) + label := pcLineLabelName(id) + ptrDirective := ".quad" + align := "3" + if p.prog.PointerSize() == 4 { + ptrDirective = ".long" + align = "2" + } + b.InlineAsm( + label + ":\n" + + ".pushsection llgo_pcline,\"ao\",@progbits," + asmQuoteSymbol(p.fn.Name()) + "\n" + + ".p2align " + align + "\n" + + ptrDirective + " " + label + "\n" + + ".quad " + uint64Hex(id) + "\n" + + ".popsection", + ) + p.pkg.EmitPCLineInfo(id, p.fn.Name(), position.Filename, position.Line, position.Column) +} + +func canEmitPCLineLabelsForTarget(target *llssa.Target) bool { + if target == nil { + return false + } + if target.Target != "" || target.GOARCH == "wasm" { + return false + } + // This path uses ELF SHF_LINK_ORDER section syntax. Darwin needs a Mach-O + // live_support section path, and other object formats need separate support. + return target.GOOS == "linux" +} + +func pcLineID(symbol string, seq uint64) uint64 { + const ( + offset = uint64(14695981039346656037) + prime = uint64(1099511628211) + ) + h := offset + for i := 0; i < len(symbol); i++ { + h ^= uint64(symbol[i]) + h *= prime + } + for i := 0; i < 8; i++ { + h ^= byteOfUint64(seq, uint(i*8)) + h *= prime + } + if h == 0 { + return 1 + } + return h +} + +func byteOfUint64(v uint64, shift uint) uint64 { + return (v >> shift) & 0xff +} + +func pcLineLabelName(id uint64) string { + const hexdigits = "0123456789abcdef" + var buf [16]byte + for i := len(buf) - 1; i >= 0; i-- { + buf[i] = hexdigits[id&0xf] + id >>= 4 + } + return "__llgo_pcsite_" + string(buf[:]) +} + +func uint64Hex(v uint64) string { + const hexdigits = "0123456789abcdef" + var buf [18]byte + buf[0] = '0' + buf[1] = 'x' + for i := len(buf) - 1; i >= 2; i-- { + buf[i] = hexdigits[v&0xf] + v >>= 4 + } + return string(buf[:]) +} + +func asmQuoteSymbol(symbol string) string { + var b strings.Builder + b.Grow(len(symbol) + 2) + b.WriteByte('"') + for i := 0; i < len(symbol); i++ { + switch symbol[i] { + case '\\', '"': + b.WriteByte('\\') + case '$': + b.WriteByte('$') + } + b.WriteByte(symbol[i]) + } + b.WriteByte('"') + return b.String() +} + func (p *context) popCallerLocationFrame(b llssa.Builder) { if p.callerFrameMark.IsNil() { return @@ -1634,6 +1741,7 @@ func collectMethodNilDerefChecks(fn *ssa.Function) map[*ssa.UnOp]none { func (p *context) callEx(b llssa.Builder, act llssa.DoAction, call *ssa.CallCommon, ds *explicitDeferStack) (ret llssa.Expr) { p.recordCallerLocationForCall(b, call) + p.emitPCLineLabel(b, call.Pos()) cv := call.Value if mthd := call.Method; mthd != nil { reflectCheck := p.reflectTypeMethodCheck(call, mthd) diff --git a/internal/build/build.go b/internal/build/build.go index 93041b7406..ea58216d85 100644 --- a/internal/build/build.go +++ b/internal/build/build.go @@ -1044,6 +1044,8 @@ func linkMainPkg(ctx *context, pkg *packages.Package, pkgs []*aPackage, outputPa // Generate main module file (needed for global variables even in library modes) // This is compiled directly to .o and added to linkInputs (not cached) // Use a stable synthetic name to avoid confusing it with the real main package in traces/logs. + funcInfo := prepareFuncInfoTableRecords(collectFuncInfo(linkedOrder), nil) + pcLineInfo := collectPCLineInfo(linkedOrder) entryPkg := genMainModule(ctx, llssa.PkgRuntime, pkg, &genConfig{ rtInit: needRuntime, pyInit: needPyInit, @@ -1051,7 +1053,8 @@ func linkMainPkg(ctx *context, pkg *packages.Package, pkgs []*aPackage, outputPa methodByIndex: methodByIndex, methodByName: methodByName, abiSymbols: linkedModuleGlobals(linkedOrder), - funcInfo: prepareFuncInfoTableRecords(collectFuncInfo(linkedOrder), nil), + funcInfo: funcInfo, + pcLineInfo: pcLineInfo, }) entryObjFile, err := exportObject(ctx, "entry_main", entryPkg.ExportFile, entryPkg.LPkg) if err != nil { diff --git a/internal/build/funcinfo/funcinfo.go b/internal/build/funcinfo/funcinfo.go index c6043484c6..10dc6aab6c 100644 --- a/internal/build/funcinfo/funcinfo.go +++ b/internal/build/funcinfo/funcinfo.go @@ -31,6 +31,14 @@ type Record struct { Column uint32 } +type PCLineRecord struct { + ID uint64 + Symbol string + File string + Line uint32 + Column uint32 +} + type EncodedRecord struct { SymbolPkg uint16 SymbolName uint16 @@ -41,18 +49,43 @@ type EncodedRecord struct { Line uint32 } +type EncodedPCLineRecord struct { + ID uint64 + Func uint32 + File uint32 + Line uint32 +} + type Table struct { Records []EncodedRecord + PCLines []EncodedPCLineRecord StringOffsets []uint32 Strings []byte Hash []uint16 } func Encode(records []Record) (Table, error) { - if len(records) == 0 { + return EncodeWithPCLines(records, nil) +} + +func EncodeWithPCLines(records []Record, pcLines []PCLineRecord) (Table, error) { + funcIndex := make(map[string]uint32, len(records)) + for i, rec := range records { + if rec.Symbol != "" { + funcIndex[rec.Symbol] = uint32(i + 1) + } + } + filteredPCLines := make([]PCLineRecord, 0, len(pcLines)) + for _, rec := range pcLines { + if rec.ID == 0 || funcIndex[rec.Symbol] == 0 { + continue + } + filteredPCLines = append(filteredPCLines, rec) + } + if len(records) == 0 && len(filteredPCLines) == 0 { return Table{}, nil } - ids, offsets, strings, err := buildStringTable(collectStrings(records)) + ids, offsets, strings, err := buildStringTable(collectStrings(records, filteredPCLines)) if err != nil { return Table{}, err } @@ -75,6 +108,20 @@ func Encode(records []Record) (Table, error) { Line: rec.Line, }) } + out.PCLines = make([]EncodedPCLineRecord, 0, len(filteredPCLines)) + for _, rec := range filteredPCLines { + idx := funcIndex[rec.Symbol] + fileRoot, fileName := splitFileName(rec.File) + out.PCLines = append(out.PCLines, EncodedPCLineRecord{ + ID: rec.ID, + Func: idx, + File: packStringIDs(ids[fileRoot], ids[fileName]), + Line: rec.Line, + }) + } + sort.Slice(out.PCLines, func(i, j int) bool { + return out.PCLines[i].ID < out.PCLines[j].ID + }) out.Hash, err = buildHash(records) if err != nil { return Table{}, err @@ -82,13 +129,18 @@ func Encode(records []Record) (Table, error) { return out, nil } -func collectStrings(records []Record) []string { +func collectStrings(records []Record, pcLines []PCLineRecord) []string { seen := make(map[string]bool) for _, rec := range records { for _, s := range splitRecordStrings(rec) { seen[s] = true } } + for _, rec := range pcLines { + fileRoot, fileName := splitFileName(rec.File) + seen[fileRoot] = true + seen[fileName] = true + } delete(seen, "") out := make([]string, 0, len(seen)) for s := range seen { @@ -103,6 +155,10 @@ func collectStrings(records []Record) []string { return out } +func packStringIDs(hi, lo uint16) uint32 { + return uint32(hi)<<16 | uint32(lo) +} + func splitRecordStrings(rec Record) []string { symPkg, symName := splitQualifiedName(rec.Symbol) namePkg, nameName := splitQualifiedName(rec.Name) @@ -246,6 +302,10 @@ func (t Table) File(rec EncodedRecord) string { return t.String(rec.FileRoot) + t.String(rec.FileName) } +func (t Table) PCLineFile(rec EncodedPCLineRecord) string { + return t.String(uint16(rec.File>>16)) + t.String(uint16(rec.File)) +} + func (t Table) LookupSymbol(symbol string) (int, bool) { if len(t.Hash) == 0 { return 0, false @@ -267,7 +327,7 @@ func (t Table) LookupSymbol(symbol string) (int, bool) { } func (t Table) SizeBytes() int { - return len(t.Records)*16 + len(t.StringOffsets)*4 + len(t.Strings) + len(t.Hash)*2 + return len(t.Records)*16 + len(t.PCLines)*24 + len(t.StringOffsets)*4 + len(t.Strings) + len(t.Hash)*2 } func joinQualified(pkg, local string) string { diff --git a/internal/build/funcinfo/funcinfo_test.go b/internal/build/funcinfo/funcinfo_test.go index 238543d3e9..78a59fad2a 100644 --- a/internal/build/funcinfo/funcinfo_test.go +++ b/internal/build/funcinfo/funcinfo_test.go @@ -49,6 +49,36 @@ func TestEncodePoolsStringsAndBuildsHash(t *testing.T) { } } +func TestEncodeWithPCLines(t *testing.T) { + table, err := EncodeWithPCLines( + []Record{ + {Symbol: "example.com/p.f", Name: "example.com/p.F", File: "/src/p/f.go", Line: 10, Column: 1}, + {Symbol: "example.com/p.g", Name: "example.com/p.G", File: "/src/p/g.go", Line: 20, Column: 1}, + }, + []PCLineRecord{ + {ID: 3, Symbol: "missing", File: "missing.go", Line: 30}, + {ID: 2, Symbol: "example.com/p.g", File: "/src/p/call_g.go", Line: 22}, + {ID: 1, Symbol: "example.com/p.f", File: "/src/p/call_f.go", Line: 12}, + {ID: 0, Symbol: "example.com/p.f", File: "zero.go", Line: 1}, + }, + ) + if err != nil { + t.Fatal(err) + } + if len(table.PCLines) != 2 { + t.Fatalf("encoded pclines = %d, want 2", len(table.PCLines)) + } + if got := table.PCLines[0]; got.ID != 1 || got.Func != 1 || got.Line != 12 { + t.Fatalf("first pcline = %+v, want id 1 func 1 line 12", got) + } + if got := table.PCLines[1]; got.ID != 2 || got.Func != 2 || got.Line != 22 { + t.Fatalf("second pcline = %+v, want id 2 func 2 line 22", got) + } + if got := table.PCLineFile(table.PCLines[0]); got != "/src/p/call_f.go" { + t.Fatalf("pcline file = %q, want /src/p/call_f.go", got) + } +} + func TestEncodeRoundTripsSingleRecord(t *testing.T) { table, err := Encode([]Record{{Symbol: "s", Name: "n", File: "f", Line: 1, Column: 2}}) if err != nil { diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index 37402229dd..83b22d8bd1 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -32,7 +32,14 @@ const ( funcInfoStringOffsetsSymbol = "__llgo_funcinfo_string_offsets" funcInfoHashSymbol = "__llgo_funcinfo_hash" funcInfoHashMaskSymbol = "__llgo_funcinfo_hash_mask" + pcLineTableSymbol = "__llgo_pcline_table" + pcLineCountSymbol = "__llgo_pcline_count" + pcSiteStartPtrSymbol = "__llgo_pcsite_start" + pcSiteEndPtrSymbol = "__llgo_pcsite_end" + pcSiteStartSymbol = "__start_llgo_pcline" + pcSiteEndSymbol = "__stop_llgo_pcline" funcInfoDataSymbol = "__llgo_funcinfo_table$data" + pcLineDataSymbol = "__llgo_pcline_table$data" funcInfoStringsDataSymbol = "__llgo_funcinfo_strings$data" funcInfoStringOffsetsDataSymbol = "__llgo_funcinfo_string_offsets$data" funcInfoHashDataSymbol = "__llgo_funcinfo_hash$data" @@ -46,6 +53,14 @@ type funcInfoRecord struct { column uint32 } +type pcLineRecord struct { + id uint64 + symbol string + file string + line uint32 + column uint32 +} + func collectFuncInfo(pkgs []Package) []funcInfoRecord { seen := make(map[string]funcInfoRecord) for _, pkg := range pkgs { @@ -74,6 +89,36 @@ func collectFuncInfo(pkgs []Package) []funcInfoRecord { return out } +func collectPCLineInfo(pkgs []Package) []pcLineRecord { + var out []pcLineRecord + seen := make(map[uint64]none) + for _, pkg := range pkgs { + if pkg == nil || pkg.LPkg == nil { + continue + } + for _, rec := range readPCLineInfo(pkg.LPkg.Module()) { + if rec.id == 0 || rec.symbol == "" { + continue + } + if _, ok := seen[rec.id]; ok { + continue + } + seen[rec.id] = none{} + out = append(out, rec) + } + } + sort.Slice(out, func(i, j int) bool { + if out[i].symbol != out[j].symbol { + return out[i].symbol < out[j].symbol + } + if out[i].line != out[j].line { + return out[i].line < out[j].line + } + return out[i].id < out[j].id + }) + return out +} + func prepareFuncInfoTableRecords(records []funcInfoRecord, liveSymbols map[string]none) []funcInfoRecord { if len(records) == 0 { return nil @@ -123,12 +168,38 @@ func readFuncInfo(mod llvm.Module) []funcInfoRecord { return out } -func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord) { +func readPCLineInfo(mod llvm.Module) []pcLineRecord { + rows := mod.NamedMetadataOperands(llssa.PCLineMetadataName) + if len(rows) == 0 { + return nil + } + out := make([]pcLineRecord, 0, len(rows)) + for _, row := range rows { + fields := row.MDNodeOperands() + if len(fields) != 6 || fields[0].ZExtValue() != 1 { + continue + } + if !fields[2].IsAMDString() || !fields[3].IsAMDString() { + continue + } + out = append(out, pcLineRecord{ + id: fields[1].ZExtValue(), + symbol: fields[2].MDString(), + file: fields[3].MDString(), + line: uint32(fields[4].ZExtValue()), + column: uint32(fields[5].ZExtValue()), + }) + } + return out +} + +func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord, pcLines []pcLineRecord) { mod := pkg.Module() llvmCtx := mod.Context() i8Type := llvmCtx.Int8Type() i16Type := llvmCtx.Int16Type() i32Type := llvmCtx.Int32Type() + i64Type := llvmCtx.Int64Type() countType := llvmCtx.IntType(ctx.prog.PointerSize() * 8) recordType := llvmCtx.StructType([]llvm.Type{ i16Type, @@ -139,27 +210,58 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord i16Type, i32Type, }, false) + pcLineRecordType := llvmCtx.StructType([]llvm.Type{ + i64Type, + i32Type, + i32Type, + i32Type, + }, false) + pcSiteRecordType := llvmCtx.StructType([]llvm.Type{ + llvm.PointerType(i8Type, 0), + i64Type, + }, false) tablePtr := llvm.AddGlobal(mod, llvm.PointerType(recordType, 0), funcInfoTableSymbol) + pcLinePtr := llvm.AddGlobal(mod, llvm.PointerType(pcLineRecordType, 0), pcLineTableSymbol) + pcSiteStartPtr := llvm.AddGlobal(mod, llvm.PointerType(pcSiteRecordType, 0), pcSiteStartPtrSymbol) + pcSiteEndPtr := llvm.AddGlobal(mod, llvm.PointerType(pcSiteRecordType, 0), pcSiteEndPtrSymbol) stringsPtr := llvm.AddGlobal(mod, llvm.PointerType(i8Type, 0), funcInfoStringsSymbol) stringOffsetsPtr := llvm.AddGlobal(mod, llvm.PointerType(i32Type, 0), funcInfoStringOffsetsSymbol) hashPtr := llvm.AddGlobal(mod, llvm.PointerType(i16Type, 0), funcInfoHashSymbol) count := llvm.AddGlobal(mod, countType, funcInfoCountSymbol) + pcLineCount := llvm.AddGlobal(mod, countType, pcLineCountSymbol) hashMask := llvm.AddGlobal(mod, countType, funcInfoHashMaskSymbol) - if len(records) == 0 { + if len(records) == 0 && len(pcLines) == 0 { tablePtr.SetInitializer(llvm.ConstPointerNull(tablePtr.GlobalValueType())) + pcLinePtr.SetInitializer(llvm.ConstPointerNull(pcLinePtr.GlobalValueType())) + pcSiteStartPtr.SetInitializer(llvm.ConstPointerNull(pcSiteStartPtr.GlobalValueType())) + pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) stringsPtr.SetInitializer(llvm.ConstPointerNull(stringsPtr.GlobalValueType())) stringOffsetsPtr.SetInitializer(llvm.ConstPointerNull(stringOffsetsPtr.GlobalValueType())) hashPtr.SetInitializer(llvm.ConstPointerNull(hashPtr.GlobalValueType())) count.SetInitializer(llvm.ConstInt(countType, 0, false)) + pcLineCount.SetInitializer(llvm.ConstInt(countType, 0, false)) hashMask.SetInitializer(llvm.ConstInt(countType, 0, false)) return } - encoded, err := buildfuncinfo.Encode(toFuncInfoRecords(records)) + encoded, err := buildfuncinfo.EncodeWithPCLines(toFuncInfoRecords(records), toPCLineRecords(pcLines)) if err != nil { panic(err) } + if len(encoded.Records) == 0 && len(encoded.PCLines) == 0 { + tablePtr.SetInitializer(llvm.ConstPointerNull(tablePtr.GlobalValueType())) + pcLinePtr.SetInitializer(llvm.ConstPointerNull(pcLinePtr.GlobalValueType())) + pcSiteStartPtr.SetInitializer(llvm.ConstPointerNull(pcSiteStartPtr.GlobalValueType())) + pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) + stringsPtr.SetInitializer(llvm.ConstPointerNull(stringsPtr.GlobalValueType())) + stringOffsetsPtr.SetInitializer(llvm.ConstPointerNull(stringOffsetsPtr.GlobalValueType())) + hashPtr.SetInitializer(llvm.ConstPointerNull(hashPtr.GlobalValueType())) + count.SetInitializer(llvm.ConstInt(countType, 0, false)) + pcLineCount.SetInitializer(llvm.ConstInt(countType, 0, false)) + hashMask.SetInitializer(llvm.ConstInt(countType, 0, false)) + return + } values := make([]llvm.Value, 0, len(encoded.Records)) for _, rec := range encoded.Records { @@ -181,6 +283,45 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord data.SetUnnamedAddr(true) data.SetAlignment(4) + pcLineValues := make([]llvm.Value, 0, len(encoded.PCLines)) + for _, rec := range encoded.PCLines { + pcLineValues = append(pcLineValues, llvm.ConstNamedStruct(pcLineRecordType, []llvm.Value{ + llvm.ConstInt(i64Type, rec.ID, false), + llvm.ConstInt(i32Type, uint64(rec.Func), false), + llvm.ConstInt(i32Type, uint64(rec.File), false), + llvm.ConstInt(i32Type, uint64(rec.Line), false), + })) + } + if len(pcLineValues) == 0 { + pcLinePtr.SetInitializer(llvm.ConstPointerNull(pcLinePtr.GlobalValueType())) + pcLineCount.SetInitializer(llvm.ConstInt(countType, 0, false)) + pcSiteStartPtr.SetInitializer(llvm.ConstPointerNull(pcSiteStartPtr.GlobalValueType())) + pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) + } else { + pcLineArrayType := llvm.ArrayType(pcLineRecordType, len(pcLineValues)) + pcLineData := llvm.AddGlobal(mod, pcLineArrayType, pcLineDataSymbol) + pcLineData.SetInitializer(llvm.ConstArray(pcLineRecordType, pcLineValues)) + pcLineData.SetLinkage(llvm.PrivateLinkage) + pcLineData.SetGlobalConstant(true) + pcLineData.SetUnnamedAddr(true) + pcLineData.SetAlignment(8) + pcLinePtr.SetInitializer(llvm.ConstInBoundsGEP(pcLineArrayType, pcLineData, []llvm.Value{ + llvm.ConstInt(countType, 0, false), + llvm.ConstInt(countType, 0, false), + })) + pcLineCount.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.PCLines)), false)) + if ctx.buildConf.Goos == "linux" && ctx.buildConf.Target == "" { + emitPCSiteSentinel(mod, ctx.prog.PointerSize()) + pcSiteStart := llvm.AddGlobal(mod, pcSiteRecordType, pcSiteStartSymbol) + pcSiteEnd := llvm.AddGlobal(mod, pcSiteRecordType, pcSiteEndSymbol) + pcSiteStartPtr.SetInitializer(pcSiteStart) + pcSiteEndPtr.SetInitializer(pcSiteEnd) + } else { + pcSiteStartPtr.SetInitializer(llvm.ConstPointerNull(pcSiteStartPtr.GlobalValueType())) + pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) + } + } + stringArrayType := llvm.ArrayType(i8Type, len(encoded.Strings)) stringData := llvm.AddGlobal(mod, stringArrayType, funcInfoStringsDataSymbol) stringData.SetInitializer(llvmCtx.ConstString(string(encoded.Strings), false)) @@ -237,6 +378,21 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord count.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Records)), false)) } +func emitPCSiteSentinel(mod llvm.Module, pointerSize int) { + ptrDirective := ".quad" + align := "3" + if pointerSize == 4 { + ptrDirective = ".long" + align = "2" + } + mod.SetInlineAsm( + ".section llgo_pcline,\"aR\",@progbits\n" + + ".p2align " + align + "\n" + + ptrDirective + " 0\n" + + ".quad 0\n", + ) +} + func toFuncInfoRecords(records []funcInfoRecord) []buildfuncinfo.Record { out := make([]buildfuncinfo.Record, len(records)) for i, rec := range records { @@ -250,3 +406,17 @@ func toFuncInfoRecords(records []funcInfoRecord) []buildfuncinfo.Record { } return out } + +func toPCLineRecords(records []pcLineRecord) []buildfuncinfo.PCLineRecord { + out := make([]buildfuncinfo.PCLineRecord, len(records)) + for i, rec := range records { + out[i] = buildfuncinfo.PCLineRecord{ + ID: rec.id, + Symbol: rec.symbol, + File: rec.file, + Line: rec.line, + Column: rec.column, + } + } + return out +} diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index 649a20a8bc..6371a0ec4e 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -55,10 +55,14 @@ func TestFuncInfoTableMaterializesMetadataWithoutFunctionPointers(t *testing.T) ir := entry.LPkg.String() for _, want := range []string{ "@__llgo_funcinfo_table = global ptr", + "@__llgo_pcline_table = global ptr null", + "@__llgo_pcsite_start = global ptr null", + "@__llgo_pcsite_end = global ptr null", "@__llgo_funcinfo_strings = global ptr", "@__llgo_funcinfo_string_offsets = global ptr", "@__llgo_funcinfo_hash = global ptr", "@__llgo_funcinfo_count = global i64 1", + "@__llgo_pcline_count = global i64 0", "@__llgo_funcinfo_hash_mask = global i64 1", `@"__llgo_funcinfo_table$data" = private unnamed_addr constant [1 x { i16, i16, i16, i16, i16, i16, i32 }]`, `@"__llgo_funcinfo_string_offsets$data" = private unnamed_addr constant`, @@ -78,6 +82,58 @@ func TestFuncInfoTableMaterializesMetadataWithoutFunctionPointers(t *testing.T) } } +func TestFuncInfoTableMaterializesPCLineMetadata(t *testing.T) { + prog := llssa.NewProgram(nil) + src := prog.NewPackage("example.com/p", "example.com/p") + src.EmitFuncInfo("example.com/p.live", "example.com/p.Live", "live.go", 17, 3) + src.EmitPCLineInfo(0x1234, "example.com/p.live", "call.go", 23, 5) + src.EmitPCLineInfo(0x5678, "example.com/p.missing", "missing.go", 99, 1) + + records := collectFuncInfo([]Package{{LPkg: src}}) + pcLines := collectPCLineInfo([]Package{{LPkg: src}}) + if len(records) != 1 { + t.Fatalf("collectFuncInfo returned %d records, want 1", len(records)) + } + if len(pcLines) != 2 { + t.Fatalf("collectPCLineInfo returned %d records, want 2", len(pcLines)) + } + + ctx := &context{ + prog: prog, + buildConf: &Config{ + BuildMode: BuildModeExe, + Goos: "linux", + Goarch: "amd64", + }, + } + entry := genMainModule(ctx, llssa.PkgRuntime, &packages.Package{ + PkgPath: "example.com/main", + ExportFile: "main.a", + }, &genConfig{funcInfo: records, pcLineInfo: pcLines}) + ir := entry.LPkg.String() + for _, want := range []string{ + "@__llgo_pcline_table = global ptr", + "@__llgo_pcsite_start = global ptr @__start_llgo_pcline", + "@__llgo_pcsite_end = global ptr @__stop_llgo_pcline", + "@__llgo_pcline_count = global i64 1", + "module asm \".section llgo_pcline", + `@"__llgo_pcline_table$data" = private unnamed_addr constant [1 x { i64, i32, i32, i32 }]`, + "i64 4660", + "i32 23", + `call.go\00`, + } { + if !strings.Contains(ir, want) { + t.Fatalf("pcline table IR missing %q:\n%s", want, ir) + } + } + if strings.Contains(ir, "missing.go") || strings.Contains(ir, "i64 22136") { + t.Fatalf("pcline table should drop records without matching function metadata:\n%s", ir) + } + if strings.Contains(ir, `ptr @"example.com/p.live"`) { + t.Fatalf("pcline table must not reference function pointers:\n%s", ir) + } +} + func TestPrepareFuncInfoTableRecordsFiltersLiveSymbols(t *testing.T) { records := []funcInfoRecord{ {symbol: "dead", name: "dead"}, @@ -138,10 +194,14 @@ func TestFuncInfoTableEmptyDefinitions(t *testing.T) { ir := entry.LPkg.String() for _, want := range []string{ "@__llgo_funcinfo_table = global ptr null", + "@__llgo_pcline_table = global ptr null", + "@__llgo_pcsite_start = global ptr null", + "@__llgo_pcsite_end = global ptr null", "@__llgo_funcinfo_strings = global ptr null", "@__llgo_funcinfo_string_offsets = global ptr null", "@__llgo_funcinfo_hash = global ptr null", "@__llgo_funcinfo_count = global i64 0", + "@__llgo_pcline_count = global i64 0", "@__llgo_funcinfo_hash_mask = global i64 0", } { if !strings.Contains(ir, want) { diff --git a/internal/build/main_module.go b/internal/build/main_module.go index 9f68a976ac..83289dd23f 100644 --- a/internal/build/main_module.go +++ b/internal/build/main_module.go @@ -44,6 +44,7 @@ type genConfig struct { methodByName map[string]none abiSymbols map[string]none funcInfo []funcInfoRecord + pcLineInfo []pcLineRecord } // genMainModule generates the main entry module for an llgo program. @@ -61,7 +62,7 @@ func genMainModule(ctx *context, rtPkgPath string, pkg *packages.Package, cfg *g argvValueType := prog.Pointer(prog.CStr()) argvVar := mainPkg.NewVarEx("__llgo_argv", prog.Pointer(argvValueType)) argvVar.InitNil() - emitFuncInfoTable(ctx, mainPkg, cfg.funcInfo) + emitFuncInfoTable(ctx, mainPkg, cfg.funcInfo, cfg.pcLineInfo) exportFile := pkg.ExportFile if exportFile == "" { diff --git a/runtime/internal/clite/debug/_wrap/debug.c b/runtime/internal/clite/debug/_wrap/debug.c index cf050c8848..a03fb3ca1c 100644 --- a/runtime/internal/clite/debug/_wrap/debug.c +++ b/runtime/internal/clite/debug/_wrap/debug.c @@ -21,6 +21,13 @@ int llgo_addrinfo(void *addr, Dl_info *info) { return ret; } +void *llgo_symbol(char *name) { + int saved_errno = errno; + void *ret = dlsym(RTLD_DEFAULT, name); + errno = saved_errno; + return ret; +} + void llgo_stacktrace(int skip, void *ctx, int (*fn)(void *ctx, void *pc, void *offset, void *sp, char *name)) { int saved_errno = errno; unw_cursor_t cursor; diff --git a/runtime/internal/clite/debug/debug.go b/runtime/internal/clite/debug/debug.go index d35899cd99..d58a5f1941 100644 --- a/runtime/internal/clite/debug/debug.go +++ b/runtime/internal/clite/debug/debug.go @@ -25,6 +25,9 @@ func Address() unsafe.Pointer //go:linkname Addrinfo C.llgo_addrinfo func Addrinfo(addr unsafe.Pointer, info *Info) c.Int +//go:linkname Symbol C.llgo_symbol +func Symbol(name *c.Char) unsafe.Pointer + //go:linkname stacktrace C.llgo_stacktrace func stacktrace(skip c.Int, ctx unsafe.Pointer, fn func(ctx, pc, offset, sp unsafe.Pointer, name *c.Char) c.Int) diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 919cd74ed3..543d395cd7 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -127,7 +127,7 @@ const frameSymbolCacheSize = 128 var frameSymbolCache [frameSymbolCacheSize]frameSymbolCacheEntry func recordFrameSymbol(pc, offset uintptr, name string) { - if pc == 0 || name == "" { + if pc == 0 || name == "" || isPCSiteSymbol(name) { return } i := (pc >> 4) & (frameSymbolCacheSize - 1) @@ -162,6 +162,42 @@ var runtimeFuncInfoCount uintptr //go:linkname runtimeFuncInfoHashMask __llgo_funcinfo_hash_mask var runtimeFuncInfoHashMask uintptr +type runtimePCLineRecord struct { + id uint64 + funcIndex uint32 + file uint32 + line uint32 +} + +//go:linkname runtimePCLineTable __llgo_pcline_table +var runtimePCLineTable *runtimePCLineRecord + +//go:linkname runtimePCLineCount __llgo_pcline_count +var runtimePCLineCount uintptr + +type runtimePCSiteRecord struct { + pc uintptr + id uint64 +} + +//go:linkname runtimePCSiteStart __llgo_pcsite_start +var runtimePCSiteStart *runtimePCSiteRecord + +//go:linkname runtimePCSiteEnd __llgo_pcsite_end +var runtimePCSiteEnd *runtimePCSiteRecord + +type runtimePCLineFrame struct { + pc uintptr + entry uintptr + function string + file string + line int + startLine int +} + +var runtimePCLineInit bool +var runtimePCLineFrames []runtimePCLineFrame + func hasStringPrefix(s, prefix string) bool { if len(s) < len(prefix) { return false @@ -174,6 +210,15 @@ func hasStringPrefix(s, prefix string) bool { return true } +func isPCSiteSymbol(name string) bool { + for i := 0; i < len(name) && name[i] == '_'; i++ { + if hasStringPrefix(name[i:], "__llgo_pcsite_") { + return true + } + } + return false +} + func publicFunctionName(name string) string { const commandLineArguments = "command-line-arguments." if hasStringPrefix(name, commandLineArguments) { @@ -252,6 +297,11 @@ func funcInfoAt(i uintptr) *runtimeFuncInfoRecord { return (*runtimeFuncInfoRecord)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoTable), i*size)) } +func pcLineAt(i uintptr) *runtimePCLineRecord { + size := unsafe.Sizeof(*runtimePCLineTable) + return (*runtimePCLineRecord)(unsafe.Add(unsafe.Pointer(runtimePCLineTable), i*size)) +} + func funcInfoHashString(s string) uintptr { const ( offset = uint32(2166136261) @@ -317,10 +367,17 @@ func funcInfoJoinFile(rootID, nameID uint16) string { return string(buf) } +func funcInfoPackedFile(file uint32) string { + return funcInfoJoinFile(uint16(file>>16), uint16(file)) +} + func funcInfoForSymbol(symbol string) *runtimeFuncInfoRecord { if symbol == "" || runtimeFuncInfoTable == nil || runtimeFuncInfoCount == 0 { return nil } + if runtimeFuncInfoStrings == nil || runtimeFuncInfoStringOffsets == nil || runtimeFuncInfoCount > 1<<20 || runtimeFuncInfoHashMask > 1<<22 { + return nil + } if runtimeFuncInfoHash != nil && runtimeFuncInfoHashMask != 0 { slot := funcInfoHashString(symbol) & runtimeFuncInfoHashMask for probes := uintptr(0); probes <= runtimeFuncInfoHashMask; probes++ { @@ -382,6 +439,9 @@ func cachedFrameSymbol(pc uintptr) pcSymbol { return pcSymbol{pc: pc} } rawFn := entry.name + if isPCSiteSymbol(rawFn) { + return pcSymbol{pc: pc} + } fn := publicFunctionName(rawFn) sym := pcSymbol{ pc: pc, @@ -399,6 +459,9 @@ func addrInfoSymbol(pc uintptr) pcSymbol { return cachedFrameSymbol(pc) } rawFn := safeGoString(info.Sname, "") + if isPCSiteSymbol(rawFn) { + return pcSymbol{pc: pc} + } if rawFn == "" { if sym := cachedFrameSymbol(pc); sym.ok { return sym @@ -415,28 +478,284 @@ func addrInfoSymbol(pc uintptr) pcSymbol { return sym } -func frameSymbol(pc uintptr) pcSymbol { - if frame, ok := rtdebug.FrameForPC(pc); ok { - return pcSymbol{ +func initRuntimePCLineFrames() { + if runtimePCLineInit { + return + } + runtimePCLineInit = true + if runtimePCLineTable == nil || + runtimePCLineCount == 0 || + runtimePCSiteStart == nil || + runtimePCSiteEnd == nil || + runtimeFuncInfoTable == nil || + runtimeFuncInfoCount == 0 || + runtimeFuncInfoStrings == nil || + runtimeFuncInfoStringOffsets == nil { + return + } + if runtimePCLineCount > 1<<20 || runtimePCLineCount > runtimeFuncInfoCount*1024 { + return + } + start := uintptr(unsafe.Pointer(runtimePCSiteStart)) + end := uintptr(unsafe.Pointer(runtimePCSiteEnd)) + size := unsafe.Sizeof(*runtimePCSiteStart) + if end <= start || size == 0 || (end-start)%size != 0 { + return + } + nsite := (end - start) / size + if nsite > runtimePCLineCount*1024 || nsite > 1<<22 { + return + } + frames := make([]runtimePCLineFrame, 0, nsite) + for i := uintptr(0); i < nsite; i++ { + site := (*runtimePCSiteRecord)(unsafe.Pointer(start + i*size)) + if site == nil || site.id == 0 || site.pc == 0 { + continue + } + rec := pcLineInfoForID(site.id) + if rec == nil || rec.funcIndex == 0 || uintptr(rec.funcIndex) > runtimeFuncInfoCount { + continue + } + pc := site.pc + fn := funcInfoAt(uintptr(rec.funcIndex) - 1) + entry := symbolPC(funcInfoJoinName(fn.symbolPkg, fn.symbolName)) + if entry == 0 { + sym := addrInfoSymbol(pc) + entry = sym.entry + } + file := funcInfoPackedFile(rec.file) + if file == "" { + file = funcInfoJoinFile(fn.fileRoot, fn.fileName) + } + line := int(rec.line) + if line == 0 { + line = int(fn.line) + } + function := publicFunctionName(funcInfoJoinName(fn.namePkg, fn.nameName)) + if function == "" { + function = publicFunctionName(funcInfoJoinName(fn.symbolPkg, fn.symbolName)) + } + frames = append(frames, runtimePCLineFrame{ pc: pc, - entry: frame.Entry, - function: frame.Function, - file: frame.File, - line: frame.Line, - startLine: frame.StartLine, - ok: true, + entry: entry, + function: function, + file: file, + line: line, + startLine: int(fn.line), + }) + } + sortRuntimePCLineFrames(frames) + runtimePCLineFrames = uniqueRuntimePCLineFrames(frames) +} + +func pcLineInfoForID(id uint64) *runtimePCLineRecord { + lo, hi := uintptr(0), runtimePCLineCount + for lo < hi { + mid := (lo + hi) >> 1 + rec := pcLineAt(mid) + if rec.id >= id { + hi = mid + } else { + lo = mid + 1 + } + } + if lo >= runtimePCLineCount { + return nil + } + rec := pcLineAt(lo) + if rec.id != id { + return nil + } + return rec +} + +func symbolPC(symbol string) uintptr { + if symbol == "" { + return 0 + } + buf := make([]byte, len(symbol)+1) + copy(buf, symbol) + return uintptr(clitedebug.Symbol((*c.Char)(unsafe.Pointer(&buf[0])))) +} + +func sortRuntimePCLineFrames(frames []runtimePCLineFrame) { + if len(frames) < 2 { + return + } + quickSortRuntimePCLineFrames(frames, 0, len(frames)-1) +} + +func quickSortRuntimePCLineFrames(frames []runtimePCLineFrame, lo, hi int) { + for hi-lo > 16 { + mid := int(uint(lo+hi) >> 1) + if frames[mid].pc < frames[lo].pc { + frames[mid], frames[lo] = frames[lo], frames[mid] + } + if frames[hi].pc < frames[mid].pc { + frames[hi], frames[mid] = frames[mid], frames[hi] + } + if frames[mid].pc < frames[lo].pc { + frames[mid], frames[lo] = frames[lo], frames[mid] + } + pivot := frames[mid].pc + i, j := lo, hi + for { + for frames[i].pc < pivot { + i++ + } + for frames[j].pc > pivot { + j-- + } + if i >= j { + break + } + frames[i], frames[j] = frames[j], frames[i] + i++ + j-- + } + if j-lo < hi-i { + quickSortRuntimePCLineFrames(frames, lo, j) + lo = i + } else { + quickSortRuntimePCLineFrames(frames, i, hi) + hi = j + } + } + for i := lo + 1; i <= hi; i++ { + x := frames[i] + j := i - 1 + for j >= lo && frames[j].pc > x.pc { + frames[j+1] = frames[j] + j-- + } + frames[j+1] = x + } +} + +func uniqueRuntimePCLineFrames(frames []runtimePCLineFrame) []runtimePCLineFrame { + if len(frames) < 2 { + return frames + } + out := frames[:1] + for i := 1; i < len(frames); i++ { + if frames[i].pc == out[len(out)-1].pc { + out[len(out)-1] = frames[i] + continue + } + out = append(out, frames[i]) + } + return out +} + +func pcLineFrameForPC(pc, entry uintptr) (pcSymbol, bool) { + if pc == 0 { + return pcSymbol{}, false + } + initRuntimePCLineFrames() + frames := runtimePCLineFrames + if len(frames) == 0 { + return pcSymbol{}, false + } + lo, hi := 0, len(frames) + for lo < hi { + mid := int(uint(lo+hi) >> 1) + if frames[mid].pc > pc { + hi = mid + } else { + lo = mid + 1 + } + } + if lo == 0 { + return pcSymbol{}, false + } + frame := frames[lo-1] + if entry != 0 && frame.entry != 0 && frame.entry != entry { + return pcSymbol{}, false + } + return pcSymbol{ + pc: pc, + entry: frame.entry, + function: frame.function, + file: frame.file, + line: frame.line, + startLine: frame.startLine, + ok: true, + }, true +} + +func mergePCLineSymbol(base, line pcSymbol) pcSymbol { + if line.entry == 0 { + line.entry = base.entry + } + if line.function == "" { + line.function = base.function + } + if line.file == "" { + line.file = base.file + } + if line.line == 0 { + line.line = base.line + } + if line.startLine == 0 { + line.startLine = base.startLine + } + line.ok = true + return line +} + +func frameSymbol(pc uintptr) pcSymbol { + if pc&3 != 0 { + if frame, ok := rtdebug.FrameForPC(pc); ok { + return pcSymbol{ + pc: pc, + entry: frame.Entry, + function: frame.Function, + file: frame.File, + line: frame.Line, + startLine: frame.StartLine, + ok: true, + } } } sym := addrInfoSymbol(pc) if pc == 0 { + if frame, ok := rtdebug.FrameForPC(pc); ok { + return pcSymbol{ + pc: pc, + entry: frame.Entry, + function: frame.Function, + file: frame.File, + line: frame.Line, + startLine: frame.StartLine, + ok: true, + } + } return sym } + if lineSym, ok := pcLineFrameForPC(pc, sym.entry); ok { + return mergePCLineSymbol(sym, lineSym) + } if sym.entry == 0 || pc > sym.entry { if callSym := addrInfoSymbol(pc - 1); callSym.ok { + if lineSym, ok := pcLineFrameForPC(pc-1, callSym.entry); ok { + lineSym.pc = pc + return mergePCLineSymbol(callSym, lineSym) + } callSym.pc = pc return callSym } } + if frame, ok := rtdebug.FrameForPC(pc); ok { + return pcSymbol{ + pc: pc, + entry: frame.Entry, + function: frame.Function, + file: frame.File, + line: frame.Line, + startLine: frame.StartLine, + ok: true, + } + } return sym } diff --git a/ssa/funcinfo.go b/ssa/funcinfo.go index 734399093d..4dbc8f08e1 100644 --- a/ssa/funcinfo.go +++ b/ssa/funcinfo.go @@ -20,7 +20,9 @@ import "github.com/xgo-dev/llvm" const ( FuncInfoMetadataName = "llgo.funcinfo" + PCLineMetadataName = "llgo.pcline" funcInfoVersion = 1 + pcLineVersion = 1 ) // EnableFuncInfoMetadata controls emission of DCE-safe function source @@ -61,3 +63,31 @@ func (p Package) EmitFuncInfo(symbol, name, file string, line, column int) { }), ) } + +// EmitPCLineInfo records a PC label id and its source position. The id names a +// zero-byte label emitted in the function body; keeping the metadata string-only +// lets dead functions be removed without the line table holding address +// references to them. +func (p Package) EmitPCLineInfo(id uint64, symbol, file string, line, column int) { + if id == 0 || symbol == "" { + return + } + if line < 0 { + line = 0 + } + if column < 0 { + column = 0 + } + i32 := p.Prog.Int32().ll + i64 := p.Prog.Int64().ll + p.mod.AddNamedMetadataOperand(PCLineMetadataName, + p.Prog.ctx.MDNode([]llvm.Metadata{ + llvm.ConstInt(i32, pcLineVersion, false).ConstantAsMetadata(), + llvm.ConstInt(i64, id, false).ConstantAsMetadata(), + p.Prog.ctx.MDString(symbol), + p.Prog.ctx.MDString(file), + llvm.ConstInt(i32, uint64(line), false).ConstantAsMetadata(), + llvm.ConstInt(i32, uint64(column), false).ConstantAsMetadata(), + }), + ) +} From b29906c8c6e724f2d147b4ddce59c2d3e06f23f6 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Tue, 30 Jun 2026 22:46:23 +0800 Subject: [PATCH 09/59] runtime: optimize FuncForPC metadata lookup --- internal/build/funcinfo_table.go | 31 +- .../lib/runtime/pprof_runtime_stub_llgo.go | 37 +-- runtime/internal/lib/runtime/symtab.go | 284 +++++++++++++++++- 3 files changed, 318 insertions(+), 34 deletions(-) diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index 83b22d8bd1..680fe604e3 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -18,6 +18,7 @@ package build import ( "sort" + "strings" "github.com/xgo-dev/llvm" @@ -310,8 +311,7 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord llvm.ConstInt(countType, 0, false), })) pcLineCount.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.PCLines)), false)) - if ctx.buildConf.Goos == "linux" && ctx.buildConf.Target == "" { - emitPCSiteSentinel(mod, ctx.prog.PointerSize()) + if shouldEmitRuntimeELFSites(ctx) { pcSiteStart := llvm.AddGlobal(mod, pcSiteRecordType, pcSiteStartSymbol) pcSiteEnd := llvm.AddGlobal(mod, pcSiteRecordType, pcSiteEndSymbol) pcSiteStartPtr.SetInitializer(pcSiteStart) @@ -321,6 +321,7 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) } } + emitRuntimeFuncInfoSentinels(mod, ctx.prog.PointerSize(), shouldEmitRuntimeELFSites(ctx) && len(pcLineValues) != 0) stringArrayType := llvm.ArrayType(i8Type, len(encoded.Strings)) stringData := llvm.AddGlobal(mod, stringArrayType, funcInfoStringsDataSymbol) @@ -378,19 +379,31 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord count.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Records)), false)) } -func emitPCSiteSentinel(mod llvm.Module, pointerSize int) { +func shouldEmitRuntimeELFSites(ctx *context) bool { + return ctx != nil && + ctx.buildConf != nil && + ctx.buildConf.Goos == "linux" && + ctx.buildConf.Target == "" +} + +func emitRuntimeFuncInfoSentinels(mod llvm.Module, pointerSize int, pcSite bool) { + if !pcSite { + return + } ptrDirective := ".quad" align := "3" if pointerSize == 4 { ptrDirective = ".long" align = "2" } - mod.SetInlineAsm( - ".section llgo_pcline,\"aR\",@progbits\n" + - ".p2align " + align + "\n" + - ptrDirective + " 0\n" + - ".quad 0\n", - ) + var asm strings.Builder + if pcSite { + asm.WriteString(".section llgo_pcline,\"aR\",@progbits\n") + asm.WriteString(".p2align " + align + "\n") + asm.WriteString(ptrDirective + " 0\n") + asm.WriteString(".quad 0\n") + } + mod.SetInlineAsm(asm.String()) } func toFuncInfoRecords(records []funcInfoRecord) []buildfuncinfo.Record { diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index 8fac1ada4b..aa69889194 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -3,7 +3,6 @@ package runtime import ( - "github.com/goplus/llgo/runtime/internal/clite/tls" llrt "github.com/goplus/llgo/runtime/internal/runtime" ) @@ -87,23 +86,24 @@ func NumGoroutine() int { func SetCPUProfileRate(hz int) {} -const funcForPCCacheSize = 256 +const funcForPCCacheSize = 1024 type funcForPCCacheEntry struct { pc uintptr fn *Func } -type funcForPCCache struct { - entries [funcForPCCacheSize]funcForPCCacheEntry -} - -var funcForPCCacheTLS = tls.Alloc[*funcForPCCache](nil) +var funcForPCCache [funcForPCCacheSize]funcForPCCacheEntry func FuncForPC(pc uintptr) *Func { if fn := cachedFuncForPC(pc); fn != nil { return fn } + if sym, ok := funcPCFrameForPC(pc); ok { + fn := newFuncForPC(pc, sym) + cacheFuncForPC(pc, fn) + return fn + } sym := frameSymbol(pc) fn := newFuncForPC(pc, sym) cacheFuncForPC(pc, fn) @@ -132,27 +132,18 @@ func newFuncForPC(pc uintptr, sym pcSymbol) *Func { } func cachedFuncForPC(pc uintptr) *Func { - cache := funcForPCCacheTLS.Get() - if cache == nil { - return nil - } - entry := &cache.entries[funcForPCCacheIndex(pc)] - if entry.pc == pc && entry.fn != nil { - return entry.fn + entry := &funcForPCCache[funcForPCCacheIndex(pc)] + fn := entry.fn + if fn != nil && entry.pc == pc && fn.pc == pc { + return fn } return nil } func cacheFuncForPC(pc uintptr, fn *Func) { - cache := funcForPCCacheTLS.Get() - if cache == nil { - cache = new(funcForPCCache) - funcForPCCacheTLS.Set(cache) - } - cache.entries[funcForPCCacheIndex(pc)] = funcForPCCacheEntry{ - pc: pc, - fn: fn, - } + entry := &funcForPCCache[funcForPCCacheIndex(pc)] + entry.fn = fn + entry.pc = pc } func funcForPCCacheIndex(pc uintptr) uintptr { diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 543d395cd7..ce9ef6dc7f 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -198,6 +198,26 @@ type runtimePCLineFrame struct { var runtimePCLineInit bool var runtimePCLineFrames []runtimePCLineFrame +type runtimeFuncPCFrame struct { + entry uintptr + funcIndex uint32 + function string + file string + startLine int +} + +type runtimePCPageIndex struct { + base uintptr + pages []uint32 +} + +const runtimeFuncPCPageShift = 12 + +var runtimeFuncPCInit bool +var runtimeFuncPCFrames []runtimeFuncPCFrame +var runtimeFuncPCEntries []uintptr +var runtimeFuncPCIndex runtimePCPageIndex + func hasStringPrefix(s, prefix string) bool { if len(s) < len(prefix) { return false @@ -478,6 +498,217 @@ func addrInfoSymbol(pc uintptr) pcSymbol { return sym } +func initRuntimeFuncPCFrames() { + if runtimeFuncPCInit { + return + } + runtimeFuncPCInit = true + if runtimeFuncInfoTable == nil || + runtimeFuncInfoCount == 0 || + runtimeFuncInfoStrings == nil || + runtimeFuncInfoStringOffsets == nil { + return + } + if runtimeFuncInfoCount > 1<<20 { + return + } + frames := make([]runtimeFuncPCFrame, 0, runtimeFuncInfoCount) + entries := make([]uintptr, runtimeFuncInfoCount+1) + for i := uintptr(0); i < runtimeFuncInfoCount; i++ { + fn := funcInfoAt(i) + pc := symbolPC(funcInfoJoinName(fn.symbolPkg, fn.symbolName)) + if pc == 0 { + continue + } + index := uint32(i + 1) + function := publicFunctionName(funcInfoJoinName(fn.namePkg, fn.nameName)) + if function == "" { + function = publicFunctionName(funcInfoJoinName(fn.symbolPkg, fn.symbolName)) + } + file := funcInfoJoinFile(fn.fileRoot, fn.fileName) + frames = append(frames, runtimeFuncPCFrame{ + entry: pc, + funcIndex: index, + function: function, + file: file, + startLine: int(fn.line), + }) + if entries[index] == 0 || pc < entries[index] { + entries[index] = pc + } + } + sortRuntimeFuncPCFrames(frames) + frames = uniqueRuntimeFuncPCFrames(frames) + runtimeFuncPCFrames = frames + runtimeFuncPCEntries = entries + runtimeFuncPCIndex = buildRuntimeFuncPCIndex(frames) +} + +func sortRuntimeFuncPCFrames(frames []runtimeFuncPCFrame) { + if len(frames) < 2 { + return + } + quickSortRuntimeFuncPCFrames(frames, 0, len(frames)-1) +} + +func quickSortRuntimeFuncPCFrames(frames []runtimeFuncPCFrame, lo, hi int) { + for hi-lo > 16 { + mid := int(uint(lo+hi) >> 1) + if frames[mid].entry < frames[lo].entry { + frames[mid], frames[lo] = frames[lo], frames[mid] + } + if frames[hi].entry < frames[mid].entry { + frames[hi], frames[mid] = frames[mid], frames[hi] + } + if frames[mid].entry < frames[lo].entry { + frames[mid], frames[lo] = frames[lo], frames[mid] + } + pivot := frames[mid].entry + i, j := lo, hi + for { + for frames[i].entry < pivot { + i++ + } + for frames[j].entry > pivot { + j-- + } + if i >= j { + break + } + frames[i], frames[j] = frames[j], frames[i] + i++ + j-- + } + if j-lo < hi-i { + quickSortRuntimeFuncPCFrames(frames, lo, j) + lo = i + } else { + quickSortRuntimeFuncPCFrames(frames, i, hi) + hi = j + } + } + for i := lo + 1; i <= hi; i++ { + x := frames[i] + j := i - 1 + for j >= lo && frames[j].entry > x.entry { + frames[j+1] = frames[j] + j-- + } + frames[j+1] = x + } +} + +func uniqueRuntimeFuncPCFrames(frames []runtimeFuncPCFrame) []runtimeFuncPCFrame { + if len(frames) < 2 { + return frames + } + out := frames[:1] + for i := 1; i < len(frames); i++ { + if frames[i].entry == out[len(out)-1].entry { + out[len(out)-1] = frames[i] + continue + } + out = append(out, frames[i]) + } + return out +} + +func buildRuntimeFuncPCIndex(frames []runtimeFuncPCFrame) runtimePCPageIndex { + if len(frames) == 0 { + return runtimePCPageIndex{} + } + base := frames[0].entry >> runtimeFuncPCPageShift + last := frames[len(frames)-1].entry >> runtimeFuncPCPageShift + if last < base { + return runtimePCPageIndex{} + } + npages := last - base + 2 + if npages > 1<<20 && npages > uintptr(len(frames))*64 { + return runtimePCPageIndex{} + } + pages := make([]uint32, npages) + next := 0 + for page := range pages { + limit := (base + uintptr(page)) << runtimeFuncPCPageShift + for next < len(frames) && frames[next].entry < limit { + next++ + } + pages[page] = uint32(next) + } + return runtimePCPageIndex{base: base, pages: pages} +} + +func runtimeFuncPCFrameIndex(pc uintptr) int { + frames := runtimeFuncPCFrames + if len(frames) == 0 { + return -1 + } + lo, hi := 0, len(frames) + if pages := runtimeFuncPCIndex.pages; len(pages) != 0 { + page := pc >> runtimeFuncPCPageShift + if page >= runtimeFuncPCIndex.base { + off := page - runtimeFuncPCIndex.base + if off < uintptr(len(pages)) { + lo = int(pages[off]) + if off+1 < uintptr(len(pages)) { + hi = int(pages[off+1]) + } + if lo > 0 { + lo-- + } + if hi < len(frames) { + hi++ + } + } + } + } + for lo < hi { + mid := int(uint(lo+hi) >> 1) + if frames[mid].entry > pc { + hi = mid + } else { + lo = mid + 1 + } + } + idx := lo - 1 + if idx < 0 { + return -1 + } + return idx +} + +func funcEntryForIndex(index uint32) uintptr { + if index == 0 { + return 0 + } + initRuntimeFuncPCFrames() + if uintptr(index) >= uintptr(len(runtimeFuncPCEntries)) { + return 0 + } + return runtimeFuncPCEntries[index] +} + +func funcPCFrameForPC(pc uintptr) (pcSymbol, bool) { + if pc == 0 { + return pcSymbol{}, false + } + initRuntimeFuncPCFrames() + idx := runtimeFuncPCFrameIndex(pc) + if idx < 0 { + return pcSymbol{}, false + } + frame := runtimeFuncPCFrames[idx] + return pcSymbol{ + pc: pc, + entry: frame.entry, + function: frame.function, + file: frame.file, + line: frame.startLine, + startLine: frame.startLine, + ok: true, + }, true +} + func initRuntimePCLineFrames() { if runtimePCLineInit { return @@ -518,7 +749,10 @@ func initRuntimePCLineFrames() { } pc := site.pc fn := funcInfoAt(uintptr(rec.funcIndex) - 1) - entry := symbolPC(funcInfoJoinName(fn.symbolPkg, fn.symbolName)) + entry := funcEntryForIndex(rec.funcIndex) + if entry == 0 { + entry = symbolPC(funcInfoJoinName(fn.symbolPkg, fn.symbolName)) + } if entry == 0 { sym := addrInfoSymbol(pc) entry = sym.entry @@ -683,6 +917,39 @@ func pcLineFrameForPC(pc, entry uintptr) (pcSymbol, bool) { }, true } +func pcLineFrameForExactPC(pc uintptr) (pcSymbol, bool) { + if pc == 0 { + return pcSymbol{}, false + } + initRuntimePCLineFrames() + frames := runtimePCLineFrames + if len(frames) == 0 { + return pcSymbol{}, false + } + lo, hi := 0, len(frames) + for lo < hi { + mid := int(uint(lo+hi) >> 1) + if frames[mid].pc >= pc { + hi = mid + } else { + lo = mid + 1 + } + } + if lo >= len(frames) || frames[lo].pc != pc { + return pcSymbol{}, false + } + frame := frames[lo] + return pcSymbol{ + pc: pc, + entry: frame.entry, + function: frame.function, + file: frame.file, + line: frame.line, + startLine: frame.startLine, + ok: true, + }, true +} + func mergePCLineSymbol(base, line pcSymbol) pcSymbol { if line.entry == 0 { line.entry = base.entry @@ -717,8 +984,8 @@ func frameSymbol(pc uintptr) pcSymbol { } } } - sym := addrInfoSymbol(pc) if pc == 0 { + sym := addrInfoSymbol(pc) if frame, ok := rtdebug.FrameForPC(pc); ok { return pcSymbol{ pc: pc, @@ -732,6 +999,14 @@ func frameSymbol(pc uintptr) pcSymbol { } return sym } + if lineSym, ok := pcLineFrameForExactPC(pc); ok { + return lineSym + } + if lineSym, ok := pcLineFrameForExactPC(pc - 1); ok { + lineSym.pc = pc + return lineSym + } + sym := addrInfoSymbol(pc) if lineSym, ok := pcLineFrameForPC(pc, sym.entry); ok { return mergePCLineSymbol(sym, lineSym) } @@ -745,6 +1020,11 @@ func frameSymbol(pc uintptr) pcSymbol { return callSym } } + if !sym.ok { + if funcSym, ok := funcPCFrameForPC(pc); ok { + return funcSym + } + } if frame, ok := rtdebug.FrameForPC(pc); ok { return pcSymbol{ pc: pc, From 734368f73a3d9f8f65f46150eff0a1c9cfff7c8d Mon Sep 17 00:00:00 2001 From: Li Jie Date: Tue, 30 Jun 2026 23:20:25 +0800 Subject: [PATCH 10/59] runtime: slim FuncForPC cache hot path --- .../lib/runtime/pprof_runtime_stub_llgo.go | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index aa69889194..ec0b91983e 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -3,6 +3,8 @@ package runtime import ( + "unsafe" + llrt "github.com/goplus/llgo/runtime/internal/runtime" ) @@ -94,11 +96,24 @@ type funcForPCCacheEntry struct { } var funcForPCCache [funcForPCCacheSize]funcForPCCacheEntry +var funcForPCLast funcForPCCacheEntry func FuncForPC(pc uintptr) *Func { - if fn := cachedFuncForPC(pc); fn != nil { + if fn := funcForPCLast.fn; fn != nil && funcForPCLast.pc == pc { + return fn + } + entry := (*funcForPCCacheEntry)(unsafe.Add( + unsafe.Pointer(&funcForPCCache[0]), + funcForPCCacheIndex(pc)*unsafe.Sizeof(funcForPCCacheEntry{}), + )) + if fn := entry.fn; fn != nil && entry.pc == pc { + funcForPCLast = funcForPCCacheEntry{pc: pc, fn: fn} return fn } + return funcForPCSlow(pc) +} + +func funcForPCSlow(pc uintptr) *Func { if sym, ok := funcPCFrameForPC(pc); ok { fn := newFuncForPC(pc, sym) cacheFuncForPC(pc, fn) @@ -131,19 +146,14 @@ func newFuncForPC(pc uintptr, sym pcSymbol) *Func { } } -func cachedFuncForPC(pc uintptr) *Func { - entry := &funcForPCCache[funcForPCCacheIndex(pc)] - fn := entry.fn - if fn != nil && entry.pc == pc && fn.pc == pc { - return fn - } - return nil -} - func cacheFuncForPC(pc uintptr, fn *Func) { - entry := &funcForPCCache[funcForPCCacheIndex(pc)] + entry := (*funcForPCCacheEntry)(unsafe.Add( + unsafe.Pointer(&funcForPCCache[0]), + funcForPCCacheIndex(pc)*unsafe.Sizeof(funcForPCCacheEntry{}), + )) entry.fn = fn entry.pc = pc + funcForPCLast = funcForPCCacheEntry{pc: pc, fn: fn} } func funcForPCCacheIndex(pc uintptr) uintptr { From 85b1c13cce032c1415d14345660f95c3d690f32b Mon Sep 17 00:00:00 2001 From: Li Jie Date: Tue, 30 Jun 2026 23:51:24 +0800 Subject: [PATCH 11/59] cl: make pc-line labels clone-safe --- cl/caller_frame_test.go | 1 + cl/instr.go | 5 +++-- ssa/ssa_test.go | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index d3b53cf582..00bad87fed 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -363,6 +363,7 @@ func leaf() {} `!"example.com/foo.top"`, `!"caller_frame_compile.go"`, "__llgo_pcsite_", + "${:uid}", `.pushsection llgo_pcline`, `.quad __llgo_pcsite_`, } { diff --git a/cl/instr.go b/cl/instr.go index be19636a60..6db43beaea 100644 --- a/cl/instr.go +++ b/cl/instr.go @@ -1381,6 +1381,7 @@ func (p *context) emitPCLineLabel(b llssa.Builder, pos token.Pos) { p.pcLineSeq++ id := pcLineID(p.fn.Name(), p.pcLineSeq) label := pcLineLabelName(id) + asmLabel := label + "_${:uid}" ptrDirective := ".quad" align := "3" if p.prog.PointerSize() == 4 { @@ -1388,10 +1389,10 @@ func (p *context) emitPCLineLabel(b llssa.Builder, pos token.Pos) { align = "2" } b.InlineAsm( - label + ":\n" + + asmLabel + ":\n" + ".pushsection llgo_pcline,\"ao\",@progbits," + asmQuoteSymbol(p.fn.Name()) + "\n" + ".p2align " + align + "\n" + - ptrDirective + " " + label + "\n" + + ptrDirective + " " + asmLabel + "\n" + ".quad " + uint64Hex(id) + "\n" + ".popsection", ) diff --git a/ssa/ssa_test.go b/ssa/ssa_test.go index 378bde70ff..1adb41cf88 100644 --- a/ssa/ssa_test.go +++ b/ssa/ssa_test.go @@ -204,6 +204,40 @@ func TestFuncInfoMetadataDoesNotPreserveFunctions(t *testing.T) { testFuncInfoMetadataDoesNotPreserveFunctions(t) } +func TestPCLineMetadataEmission(t *testing.T) { + prog := NewProgram(nil) + pkg := prog.NewPackage("main", "main") + + pkg.EmitPCLineInfo(0, "ignored", "ignored.go", -1, -1) + pkg.EmitPCLineInfo(0x1234, "", "ignored.go", -1, -1) + if ir := pkg.String(); strings.Contains(ir, PCLineMetadataName) { + t.Fatalf("invalid pcline rows should not emit metadata:\n%s", ir) + } + + pkg.EmitPCLineInfo(0x1234, "main.live", "call.go", 23, 5) + pkg.EmitPCLineInfo(0x5678, "main.negative", "negative.go", -7, -1) + ir := pkg.String() + for _, want := range []string{ + `!llgo.pcline = !{!`, + `i64 4660`, + `!"main.live"`, + `!"call.go"`, + `i32 23`, + `i32 5`, + `i64 22136`, + `!"main.negative"`, + `!"negative.go"`, + `i32 0`, + } { + if !strings.Contains(ir, want) { + t.Fatalf("missing pcline field %s:\n%s", want, ir) + } + } + if strings.Contains(ir, `ptr @main.live`) || strings.Contains(ir, `ptr @"main.live"`) { + t.Fatalf("pcline metadata must not contain function pointer operands:\n%s", ir) + } +} + func testFuncInfoMetadataDoesNotPreserveFunctions(t *testing.T) { t.Helper() From b33a774d2c210094382149162a57fb698aba5407 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Wed, 1 Jul 2026 08:39:42 +0800 Subject: [PATCH 12/59] runtime: guard funcinfo table initialization --- internal/build/funcinfo/funcinfo.go | 2 + internal/build/funcinfo_table.go | 5 ++ internal/build/funcinfo_table_test.go | 3 + runtime/internal/lib/runtime/symtab.go | 65 ++++++++++++++++--- test/go/runtime_lineinfo_stack_test.go | 87 ++++++++++++++++++++++++++ 5 files changed, 155 insertions(+), 7 deletions(-) diff --git a/internal/build/funcinfo/funcinfo.go b/internal/build/funcinfo/funcinfo.go index 10dc6aab6c..76c7cc123c 100644 --- a/internal/build/funcinfo/funcinfo.go +++ b/internal/build/funcinfo/funcinfo.go @@ -253,6 +253,8 @@ func buildHash(records []Record) ([]uint16, error) { return nil, nil } if len(records) > math.MaxUint16 { + // Runtime hash slots store 1-based uint16 record indexes. Larger + // tables remain correct by omitting the hash and using linear lookup. return nil, nil } buckets := 1 diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index 680fe604e3..727f064b3b 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -31,6 +31,7 @@ const ( funcInfoCountSymbol = "__llgo_funcinfo_count" funcInfoStringsSymbol = "__llgo_funcinfo_strings" funcInfoStringOffsetsSymbol = "__llgo_funcinfo_string_offsets" + funcInfoStringCountSymbol = "__llgo_funcinfo_string_count" funcInfoHashSymbol = "__llgo_funcinfo_hash" funcInfoHashMaskSymbol = "__llgo_funcinfo_hash_mask" pcLineTableSymbol = "__llgo_pcline_table" @@ -228,6 +229,7 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord pcSiteEndPtr := llvm.AddGlobal(mod, llvm.PointerType(pcSiteRecordType, 0), pcSiteEndPtrSymbol) stringsPtr := llvm.AddGlobal(mod, llvm.PointerType(i8Type, 0), funcInfoStringsSymbol) stringOffsetsPtr := llvm.AddGlobal(mod, llvm.PointerType(i32Type, 0), funcInfoStringOffsetsSymbol) + stringCount := llvm.AddGlobal(mod, countType, funcInfoStringCountSymbol) hashPtr := llvm.AddGlobal(mod, llvm.PointerType(i16Type, 0), funcInfoHashSymbol) count := llvm.AddGlobal(mod, countType, funcInfoCountSymbol) pcLineCount := llvm.AddGlobal(mod, countType, pcLineCountSymbol) @@ -239,6 +241,7 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) stringsPtr.SetInitializer(llvm.ConstPointerNull(stringsPtr.GlobalValueType())) stringOffsetsPtr.SetInitializer(llvm.ConstPointerNull(stringOffsetsPtr.GlobalValueType())) + stringCount.SetInitializer(llvm.ConstInt(countType, 0, false)) hashPtr.SetInitializer(llvm.ConstPointerNull(hashPtr.GlobalValueType())) count.SetInitializer(llvm.ConstInt(countType, 0, false)) pcLineCount.SetInitializer(llvm.ConstInt(countType, 0, false)) @@ -257,6 +260,7 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) stringsPtr.SetInitializer(llvm.ConstPointerNull(stringsPtr.GlobalValueType())) stringOffsetsPtr.SetInitializer(llvm.ConstPointerNull(stringOffsetsPtr.GlobalValueType())) + stringCount.SetInitializer(llvm.ConstInt(countType, 0, false)) hashPtr.SetInitializer(llvm.ConstPointerNull(hashPtr.GlobalValueType())) count.SetInitializer(llvm.ConstInt(countType, 0, false)) pcLineCount.SetInitializer(llvm.ConstInt(countType, 0, false)) @@ -355,6 +359,7 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord llvm.ConstInt(countType, 0, false), llvm.ConstInt(countType, 0, false), })) + stringCount.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.StringOffsets)), false)) if len(encoded.Hash) == 0 { hashPtr.SetInitializer(llvm.ConstPointerNull(hashPtr.GlobalValueType())) hashMask.SetInitializer(llvm.ConstInt(countType, 0, false)) diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index 6371a0ec4e..8f749818e6 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -60,6 +60,7 @@ func TestFuncInfoTableMaterializesMetadataWithoutFunctionPointers(t *testing.T) "@__llgo_pcsite_end = global ptr null", "@__llgo_funcinfo_strings = global ptr", "@__llgo_funcinfo_string_offsets = global ptr", + "@__llgo_funcinfo_string_count = global i64 5", "@__llgo_funcinfo_hash = global ptr", "@__llgo_funcinfo_count = global i64 1", "@__llgo_pcline_count = global i64 0", @@ -116,6 +117,7 @@ func TestFuncInfoTableMaterializesPCLineMetadata(t *testing.T) { "@__llgo_pcsite_start = global ptr @__start_llgo_pcline", "@__llgo_pcsite_end = global ptr @__stop_llgo_pcline", "@__llgo_pcline_count = global i64 1", + "@__llgo_funcinfo_string_count = global i64 6", "module asm \".section llgo_pcline", `@"__llgo_pcline_table$data" = private unnamed_addr constant [1 x { i64, i32, i32, i32 }]`, "i64 4660", @@ -199,6 +201,7 @@ func TestFuncInfoTableEmptyDefinitions(t *testing.T) { "@__llgo_pcsite_end = global ptr null", "@__llgo_funcinfo_strings = global ptr null", "@__llgo_funcinfo_string_offsets = global ptr null", + "@__llgo_funcinfo_string_count = global i64 0", "@__llgo_funcinfo_hash = global ptr null", "@__llgo_funcinfo_count = global i64 0", "@__llgo_pcline_count = global i64 0", diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index ce9ef6dc7f..1936d5fdd3 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -9,6 +9,7 @@ import ( c "github.com/goplus/llgo/runtime/internal/clite" clitedebug "github.com/goplus/llgo/runtime/internal/clite/debug" + latomic "github.com/goplus/llgo/runtime/internal/lib/sync/atomic" rtdebug "github.com/goplus/llgo/runtime/internal/runtime" ) @@ -153,6 +154,9 @@ var runtimeFuncInfoStrings *c.Char //go:linkname runtimeFuncInfoStringOffsets __llgo_funcinfo_string_offsets var runtimeFuncInfoStringOffsets *uint32 +//go:linkname runtimeFuncInfoStringCount __llgo_funcinfo_string_count +var runtimeFuncInfoStringCount uintptr + //go:linkname runtimeFuncInfoHash __llgo_funcinfo_hash var runtimeFuncInfoHash *uint16 @@ -195,7 +199,7 @@ type runtimePCLineFrame struct { startLine int } -var runtimePCLineInit bool +var runtimePCLineInitState uint32 var runtimePCLineFrames []runtimePCLineFrame type runtimeFuncPCFrame struct { @@ -213,11 +217,17 @@ type runtimePCPageIndex struct { const runtimeFuncPCPageShift = 12 -var runtimeFuncPCInit bool +var runtimeFuncPCInitState uint32 var runtimeFuncPCFrames []runtimeFuncPCFrame var runtimeFuncPCEntries []uintptr var runtimeFuncPCIndex runtimePCPageIndex +const ( + runtimeFuncInfoInitUninit uint32 = iota + runtimeFuncInfoInitDone + runtimeFuncInfoInitBusy +) + func hasStringPrefix(s, prefix string) bool { if len(s) < len(prefix) { return false @@ -305,7 +315,8 @@ func cStringAppend(dst []byte, cstr *c.Char) []byte { } func funcInfoCString(id uint16) *c.Char { - if runtimeFuncInfoStrings == nil || runtimeFuncInfoStringOffsets == nil { + if runtimeFuncInfoStrings == nil || runtimeFuncInfoStringOffsets == nil || + uintptr(id) >= runtimeFuncInfoStringCount { return nil } off := *(*uint32)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoStringOffsets), uintptr(id)*unsafe.Sizeof(*runtimeFuncInfoStringOffsets))) @@ -499,10 +510,30 @@ func addrInfoSymbol(pc uintptr) pcSymbol { } func initRuntimeFuncPCFrames() { - if runtimeFuncPCInit { + if latomic.LoadUint32(&runtimeFuncPCInitState) == runtimeFuncInfoInitDone { return } - runtimeFuncPCInit = true + initRuntimeFuncPCFramesSlow() +} + +func initRuntimeFuncPCFramesSlow() { + for { + state := latomic.LoadUint32(&runtimeFuncPCInitState) + switch state { + case runtimeFuncInfoInitDone: + return + case runtimeFuncInfoInitUninit: + if latomic.CompareAndSwapUint32(&runtimeFuncPCInitState, runtimeFuncInfoInitUninit, runtimeFuncInfoInitBusy) { + initRuntimeFuncPCFramesOnce() + latomic.StoreUint32(&runtimeFuncPCInitState, runtimeFuncInfoInitDone) + return + } + } + c.Usleep(1) + } +} + +func initRuntimeFuncPCFramesOnce() { if runtimeFuncInfoTable == nil || runtimeFuncInfoCount == 0 || runtimeFuncInfoStrings == nil || @@ -710,10 +741,30 @@ func funcPCFrameForPC(pc uintptr) (pcSymbol, bool) { } func initRuntimePCLineFrames() { - if runtimePCLineInit { + if latomic.LoadUint32(&runtimePCLineInitState) == runtimeFuncInfoInitDone { return } - runtimePCLineInit = true + initRuntimePCLineFramesSlow() +} + +func initRuntimePCLineFramesSlow() { + for { + state := latomic.LoadUint32(&runtimePCLineInitState) + switch state { + case runtimeFuncInfoInitDone: + return + case runtimeFuncInfoInitUninit: + if latomic.CompareAndSwapUint32(&runtimePCLineInitState, runtimeFuncInfoInitUninit, runtimeFuncInfoInitBusy) { + initRuntimePCLineFramesOnce() + latomic.StoreUint32(&runtimePCLineInitState, runtimeFuncInfoInitDone) + return + } + } + c.Usleep(1) + } +} + +func initRuntimePCLineFramesOnce() { if runtimePCLineTable == nil || runtimePCLineCount == 0 || runtimePCSiteStart == nil || diff --git a/test/go/runtime_lineinfo_stack_test.go b/test/go/runtime_lineinfo_stack_test.go index e9c9bf7334..a9e95bdbc3 100644 --- a/test/go/runtime_lineinfo_stack_test.go +++ b/test/go/runtime_lineinfo_stack_test.go @@ -175,6 +175,93 @@ func TestRuntimeLineInfoAndStack(t *testing.T) { } } +const runtimeFuncInfoConcurrentFirstUseProbe = `package main + +import ( + "runtime" + "strconv" + "strings" + "sync" +) + +func main() { + const n = 32 + start := make(chan struct{}) + errc := make(chan string, n) + var wg sync.WaitGroup + for i := 0; i < n; i++ { + wg.Add(1) + go func() { + defer wg.Done() + <-start + errc <- checkRuntimeInfo() + }() + } + close(start) + wg.Wait() + close(errc) + for err := range errc { + if err != "" { + panic(err) + } + } +} + +//go:noinline +func checkRuntimeInfo() string { + pc, file, line, ok := runtime.Caller(0) // CONCURRENT_CALLER_MARK + if !ok || !strings.HasSuffix(file, "main.go") || line != CONCURRENT_CALLER_LINE { + return "bad caller: " + file + ":" + strconv.Itoa(line) + } + fn := runtime.FuncForPC(pc) + if fn == nil || fn.Name() != "main.checkRuntimeInfo" { + name := "" + if fn != nil { + name = fn.Name() + } + return "bad func: " + name + } + file, line = fn.FileLine(pc) + if !strings.HasSuffix(file, "main.go") || line != CONCURRENT_CALLER_LINE { + return "bad fileline: " + file + ":" + strconv.Itoa(line) + } + var pcs [8]uintptr + n := runtime.Callers(0, pcs[:]) + frames := runtime.CallersFrames(pcs[:n]) + for { + frame, more := frames.Next() + if frame.Function == "main.checkRuntimeInfo" { + if !strings.HasSuffix(frame.File, "main.go") || frame.Line == 0 { + return "bad frame: " + frame.File + ":" + strconv.Itoa(frame.Line) + } + return "" + } + if !more { + return "missing frame" + } + } +} +` + +func TestRuntimeFuncInfoConcurrentFirstUse(t *testing.T) { + source := runtimeFuncInfoConcurrentFirstUseProbe + source = strings.ReplaceAll(source, "CONCURRENT_CALLER_LINE", strconv.Itoa(markerLine(source, "CONCURRENT_CALLER_MARK"))) + + dir := t.TempDir() + file := filepath.Join(dir, "main.go") + if err := os.WriteFile(file, []byte(source), 0644); err != nil { + t.Fatal(err) + } + + repoRoot := findStringConversionRepoRoot(t) + t.Setenv("LLGO_ROOT", repoRoot) + cmd := exec.Command("go", "run", "./cmd/llgo", "run", "-a", file) + cmd.Dir = repoRoot + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("llgo concurrent funcinfo probe failed: %v\n%s", err, out) + } +} + func markerLine(source, marker string) int { line := 1 for _, part := range strings.SplitAfter(source, "\n") { From 8e6f33cf181a80a28ddcf112bb8f0a66d8a23e3e Mon Sep 17 00:00:00 2001 From: Li Jie Date: Wed, 1 Jul 2026 11:24:28 +0800 Subject: [PATCH 13/59] runtime: fix funcinfo entry pc line metadata --- cl/compile.go | 19 ++++++- cl/funcinfo_metadata_test.go | 9 ++++ internal/build/build.go | 2 + internal/build/funcinfo_table.go | 75 +++++++++++++++++++++++++- internal/build/funcinfo_table_test.go | 45 ++++++++++++++++ internal/build/main_module.go | 3 +- runtime/internal/lib/runtime/symtab.go | 61 ++++++++++++++++++++- test/go/runtime_lineinfo_stack_test.go | 54 +++++++++++++++++-- 8 files changed, 259 insertions(+), 9 deletions(-) diff --git a/cl/compile.go b/cl/compile.go index 443a0cc9cb..f3d2c21338 100644 --- a/cl/compile.go +++ b/cl/compile.go @@ -565,7 +565,7 @@ func (p *context) compileFuncDecl(pkg llssa.Package, f *ssa.Function) (llssa.Fun if pkgTypes != nil { goName = funcName(pkgTypes, f, false) } - pos := p.goProg.Fset.Position(f.Pos()) + pos := p.funcInfoPosition(f) pkg.EmitFuncInfo(fn.Name(), goName, pos.Filename, pos.Line, pos.Column) } var childInits []func() @@ -701,6 +701,23 @@ func (p *context) getFuncBodyPos(f *ssa.Function) token.Position { return p.goProg.Fset.Position(f.Pos()) } +func (p *context) funcInfoPosition(f *ssa.Function) token.Position { + if f != nil { + switch syntax := f.Syntax().(type) { + case *ast.FuncDecl: + if syntax.Body != nil && len(syntax.Body.List) != 0 { + return p.goProg.Fset.Position(syntax.Body.List[0].Pos()) + } + case *ast.FuncLit: + if syntax.Body != nil && len(syntax.Body.List) != 0 { + return p.goProg.Fset.Position(syntax.Body.List[0].Pos()) + } + } + return p.goProg.Fset.Position(f.Pos()) + } + return token.Position{} +} + func isGlobal(v *types.Var) bool { // TODO(lijie): better implementation return strings.HasPrefix(v.Parent().String(), "package ") diff --git a/cl/funcinfo_metadata_test.go b/cl/funcinfo_metadata_test.go index 5319b16751..902813cfad 100644 --- a/cl/funcinfo_metadata_test.go +++ b/cl/funcinfo_metadata_test.go @@ -92,6 +92,15 @@ func (T) method() {} if got := records["foo.top"].name; got != "foo.top" { t.Fatalf("caller stack frame name = %q, want foo.top", got) } + if got := records["foo.top"].line; got != 6 { + t.Fatalf("top funcinfo line = %d, want first body statement line 6", got) + } + if got := records["foo.leaf"].line; got != 9 { + t.Fatalf("leaf funcinfo line = %d, want line 9", got) + } + if got := records["foo.T.method"].line; got != 11 { + t.Fatalf("empty method funcinfo line = %d, want declaration line 11", got) + } } func TestNoInlineDirectiveDisablesTailCalls(t *testing.T) { diff --git a/internal/build/build.go b/internal/build/build.go index ea58216d85..e6c2501361 100644 --- a/internal/build/build.go +++ b/internal/build/build.go @@ -1046,6 +1046,7 @@ func linkMainPkg(ctx *context, pkg *packages.Package, pkgs []*aPackage, outputPa // Use a stable synthetic name to avoid confusing it with the real main package in traces/logs. funcInfo := prepareFuncInfoTableRecords(collectFuncInfo(linkedOrder), nil) pcLineInfo := collectPCLineInfo(linkedOrder) + funcInfoStubs := collectFuncInfoStubIndexes(linkedOrder, funcInfo) entryPkg := genMainModule(ctx, llssa.PkgRuntime, pkg, &genConfig{ rtInit: needRuntime, pyInit: needPyInit, @@ -1055,6 +1056,7 @@ func linkMainPkg(ctx *context, pkg *packages.Package, pkgs []*aPackage, outputPa abiSymbols: linkedModuleGlobals(linkedOrder), funcInfo: funcInfo, pcLineInfo: pcLineInfo, + funcInfoStubs: funcInfoStubs, }) entryObjFile, err := exportObject(ctx, "entry_main", entryPkg.ExportFile, entryPkg.LPkg) if err != nil { diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index 727f064b3b..64c2b12385 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -34,6 +34,8 @@ const ( funcInfoStringCountSymbol = "__llgo_funcinfo_string_count" funcInfoHashSymbol = "__llgo_funcinfo_hash" funcInfoHashMaskSymbol = "__llgo_funcinfo_hash_mask" + funcInfoStubIndexesSymbol = "__llgo_funcinfo_stub_indexes" + funcInfoStubCountSymbol = "__llgo_funcinfo_stub_count" pcLineTableSymbol = "__llgo_pcline_table" pcLineCountSymbol = "__llgo_pcline_count" pcSiteStartPtrSymbol = "__llgo_pcsite_start" @@ -45,6 +47,8 @@ const ( funcInfoStringsDataSymbol = "__llgo_funcinfo_strings$data" funcInfoStringOffsetsDataSymbol = "__llgo_funcinfo_string_offsets$data" funcInfoHashDataSymbol = "__llgo_funcinfo_hash$data" + funcInfoStubIndexesDataSymbol = "__llgo_funcinfo_stub_indexes$data" + closureStubPrefix = "__llgo_stub." ) type funcInfoRecord struct { @@ -121,6 +125,45 @@ func collectPCLineInfo(pkgs []Package) []pcLineRecord { return out } +func collectFuncInfoStubIndexes(pkgs []Package, records []funcInfoRecord) []uint32 { + if len(records) == 0 { + return nil + } + recordBySymbol := make(map[string]uint32, len(records)) + for i, rec := range records { + if rec.symbol != "" { + recordBySymbol[rec.symbol] = uint32(i + 1) + } + } + seen := make(map[uint32]none) + for _, pkg := range pkgs { + if pkg == nil || pkg.LPkg == nil { + continue + } + fn := pkg.LPkg.Module().FirstFunction() + for !fn.IsNil() { + name := fn.Name() + if target, ok := strings.CutPrefix(name, closureStubPrefix); ok { + if idx := recordBySymbol[target]; idx != 0 { + seen[idx] = none{} + } + } + fn = llvm.NextFunction(fn) + } + } + if len(seen) == 0 { + return nil + } + out := make([]uint32, 0, len(seen)) + for idx := range seen { + out = append(out, idx) + } + sort.Slice(out, func(i, j int) bool { + return out[i] < out[j] + }) + return out +} + func prepareFuncInfoTableRecords(records []funcInfoRecord, liveSymbols map[string]none) []funcInfoRecord { if len(records) == 0 { return nil @@ -195,7 +238,7 @@ func readPCLineInfo(mod llvm.Module) []pcLineRecord { return out } -func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord, pcLines []pcLineRecord) { +func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord, pcLines []pcLineRecord, stubIndexes []uint32) { mod := pkg.Module() llvmCtx := mod.Context() i8Type := llvmCtx.Int8Type() @@ -232,6 +275,8 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord stringCount := llvm.AddGlobal(mod, countType, funcInfoStringCountSymbol) hashPtr := llvm.AddGlobal(mod, llvm.PointerType(i16Type, 0), funcInfoHashSymbol) count := llvm.AddGlobal(mod, countType, funcInfoCountSymbol) + stubIndexesPtr := llvm.AddGlobal(mod, llvm.PointerType(i32Type, 0), funcInfoStubIndexesSymbol) + stubCount := llvm.AddGlobal(mod, countType, funcInfoStubCountSymbol) pcLineCount := llvm.AddGlobal(mod, countType, pcLineCountSymbol) hashMask := llvm.AddGlobal(mod, countType, funcInfoHashMaskSymbol) if len(records) == 0 && len(pcLines) == 0 { @@ -244,6 +289,8 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord stringCount.SetInitializer(llvm.ConstInt(countType, 0, false)) hashPtr.SetInitializer(llvm.ConstPointerNull(hashPtr.GlobalValueType())) count.SetInitializer(llvm.ConstInt(countType, 0, false)) + stubIndexesPtr.SetInitializer(llvm.ConstPointerNull(stubIndexesPtr.GlobalValueType())) + stubCount.SetInitializer(llvm.ConstInt(countType, 0, false)) pcLineCount.SetInitializer(llvm.ConstInt(countType, 0, false)) hashMask.SetInitializer(llvm.ConstInt(countType, 0, false)) return @@ -263,6 +310,8 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord stringCount.SetInitializer(llvm.ConstInt(countType, 0, false)) hashPtr.SetInitializer(llvm.ConstPointerNull(hashPtr.GlobalValueType())) count.SetInitializer(llvm.ConstInt(countType, 0, false)) + stubIndexesPtr.SetInitializer(llvm.ConstPointerNull(stubIndexesPtr.GlobalValueType())) + stubCount.SetInitializer(llvm.ConstInt(countType, 0, false)) pcLineCount.SetInitializer(llvm.ConstInt(countType, 0, false)) hashMask.SetInitializer(llvm.ConstInt(countType, 0, false)) return @@ -382,6 +431,30 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord hashMask.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Hash)-1), false)) } count.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Records)), false)) + stubIndexValues := make([]llvm.Value, 0, len(stubIndexes)) + for _, idx := range stubIndexes { + if idx == 0 || int(idx) > len(encoded.Records) { + continue + } + stubIndexValues = append(stubIndexValues, llvm.ConstInt(i32Type, uint64(idx), false)) + } + if len(stubIndexValues) == 0 { + stubIndexesPtr.SetInitializer(llvm.ConstPointerNull(stubIndexesPtr.GlobalValueType())) + stubCount.SetInitializer(llvm.ConstInt(countType, 0, false)) + } else { + stubIndexArrayType := llvm.ArrayType(i32Type, len(stubIndexValues)) + stubIndexData := llvm.AddGlobal(mod, stubIndexArrayType, funcInfoStubIndexesDataSymbol) + stubIndexData.SetInitializer(llvm.ConstArray(i32Type, stubIndexValues)) + stubIndexData.SetLinkage(llvm.PrivateLinkage) + stubIndexData.SetGlobalConstant(true) + stubIndexData.SetUnnamedAddr(true) + stubIndexData.SetAlignment(4) + stubIndexesPtr.SetInitializer(llvm.ConstInBoundsGEP(stubIndexArrayType, stubIndexData, []llvm.Value{ + llvm.ConstInt(countType, 0, false), + llvm.ConstInt(countType, 0, false), + })) + stubCount.SetInitializer(llvm.ConstInt(countType, uint64(len(stubIndexValues)), false)) + } } func shouldEmitRuntimeELFSites(ctx *context) bool { diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index 8f749818e6..1a58825e51 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -63,6 +63,8 @@ func TestFuncInfoTableMaterializesMetadataWithoutFunctionPointers(t *testing.T) "@__llgo_funcinfo_string_count = global i64 5", "@__llgo_funcinfo_hash = global ptr", "@__llgo_funcinfo_count = global i64 1", + "@__llgo_funcinfo_stub_indexes = global ptr null", + "@__llgo_funcinfo_stub_count = global i64 0", "@__llgo_pcline_count = global i64 0", "@__llgo_funcinfo_hash_mask = global i64 1", `@"__llgo_funcinfo_table$data" = private unnamed_addr constant [1 x { i16, i16, i16, i16, i16, i16, i32 }]`, @@ -83,6 +85,47 @@ func TestFuncInfoTableMaterializesMetadataWithoutFunctionPointers(t *testing.T) } } +func TestFuncInfoTableMaterializesClosureStubIndexes(t *testing.T) { + prog := llssa.NewProgram(nil) + src := prog.NewPackage("example.com/p", "example.com/p") + src.EmitFuncInfo("example.com/p.live", "example.com/p.Live", "live.go", 17, 3) + src.EmitFuncInfo("example.com/p.other", "example.com/p.Other", "other.go", 23, 1) + src.NewFunc(closureStubPrefix+"example.com/p.live", llssa.NoArgsNoRet, llssa.InC) + + records := collectFuncInfo([]Package{{LPkg: src}}) + stubs := collectFuncInfoStubIndexes([]Package{{LPkg: src}}, records) + if len(stubs) != 1 || records[stubs[0]-1].symbol != "example.com/p.live" { + t.Fatalf("stub indexes = %+v for records %+v, want live", stubs, records) + } + + ctx := &context{ + prog: prog, + buildConf: &Config{ + BuildMode: BuildModeExe, + Goos: "linux", + Goarch: "amd64", + }, + } + entry := genMainModule(ctx, llssa.PkgRuntime, &packages.Package{ + PkgPath: "example.com/main", + ExportFile: "main.a", + }, &genConfig{funcInfo: records, funcInfoStubs: stubs}) + ir := entry.LPkg.String() + for _, want := range []string{ + "@__llgo_funcinfo_stub_indexes = global ptr", + "@__llgo_funcinfo_stub_count = global i64 1", + `@"__llgo_funcinfo_stub_indexes$data" = private unnamed_addr constant [1 x i32]`, + "@__llgo_funcinfo_count = global i64 2", + } { + if !strings.Contains(ir, want) { + t.Fatalf("funcinfo stub index table IR missing %q:\n%s", want, ir) + } + } + if strings.Contains(ir, closureStubPrefix) { + t.Fatalf("stub index table should not add stub symbol strings:\n%s", ir) + } +} + func TestFuncInfoTableMaterializesPCLineMetadata(t *testing.T) { prog := llssa.NewProgram(nil) src := prog.NewPackage("example.com/p", "example.com/p") @@ -204,6 +247,8 @@ func TestFuncInfoTableEmptyDefinitions(t *testing.T) { "@__llgo_funcinfo_string_count = global i64 0", "@__llgo_funcinfo_hash = global ptr null", "@__llgo_funcinfo_count = global i64 0", + "@__llgo_funcinfo_stub_indexes = global ptr null", + "@__llgo_funcinfo_stub_count = global i64 0", "@__llgo_pcline_count = global i64 0", "@__llgo_funcinfo_hash_mask = global i64 0", } { diff --git a/internal/build/main_module.go b/internal/build/main_module.go index 83289dd23f..6992bcec73 100644 --- a/internal/build/main_module.go +++ b/internal/build/main_module.go @@ -45,6 +45,7 @@ type genConfig struct { abiSymbols map[string]none funcInfo []funcInfoRecord pcLineInfo []pcLineRecord + funcInfoStubs []uint32 } // genMainModule generates the main entry module for an llgo program. @@ -62,7 +63,7 @@ func genMainModule(ctx *context, rtPkgPath string, pkg *packages.Package, cfg *g argvValueType := prog.Pointer(prog.CStr()) argvVar := mainPkg.NewVarEx("__llgo_argv", prog.Pointer(argvValueType)) argvVar.InitNil() - emitFuncInfoTable(ctx, mainPkg, cfg.funcInfo, cfg.pcLineInfo) + emitFuncInfoTable(ctx, mainPkg, cfg.funcInfo, cfg.pcLineInfo, cfg.funcInfoStubs) exportFile := pkg.ExportFile if exportFile == "" { diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 1936d5fdd3..5dac41d161 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -166,6 +166,12 @@ var runtimeFuncInfoCount uintptr //go:linkname runtimeFuncInfoHashMask __llgo_funcinfo_hash_mask var runtimeFuncInfoHashMask uintptr +//go:linkname runtimeFuncInfoStubIndexes __llgo_funcinfo_stub_indexes +var runtimeFuncInfoStubIndexes *uint32 + +//go:linkname runtimeFuncInfoStubCount __llgo_funcinfo_stub_count +var runtimeFuncInfoStubCount uintptr + type runtimePCLineRecord struct { id uint64 funcIndex uint32 @@ -226,6 +232,8 @@ const ( runtimeFuncInfoInitUninit uint32 = iota runtimeFuncInfoInitDone runtimeFuncInfoInitBusy + runtimeClosureStubPrefix = "__llgo_stub." + runtimePublicClosureStubPrefix = "_llgo_stub." ) func hasStringPrefix(s, prefix string) bool { @@ -333,6 +341,11 @@ func pcLineAt(i uintptr) *runtimePCLineRecord { return (*runtimePCLineRecord)(unsafe.Add(unsafe.Pointer(runtimePCLineTable), i*size)) } +func funcInfoStubIndexAt(i uintptr) uint32 { + size := unsafe.Sizeof(*runtimeFuncInfoStubIndexes) + return *(*uint32)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoStubIndexes), i*size)) +} + func funcInfoHashString(s string) uintptr { const ( offset = uint32(2166136261) @@ -435,12 +448,25 @@ func funcInfoForSymbol(symbol string) *runtimeFuncInfoRecord { return nil } +func funcInfoForRuntimeSymbol(symbol string) *runtimeFuncInfoRecord { + if rec := funcInfoForSymbol(symbol); rec != nil { + return rec + } + if hasStringPrefix(symbol, runtimeClosureStubPrefix) { + return funcInfoForSymbol(symbol[len(runtimeClosureStubPrefix):]) + } + if hasStringPrefix(symbol, runtimePublicClosureStubPrefix) { + return funcInfoForSymbol(symbol[len(runtimePublicClosureStubPrefix):]) + } + return nil +} + func applyFuncInfo(sym *pcSymbol, rawFunction string) { - rec := funcInfoForSymbol(rawFunction) + rec := funcInfoForRuntimeSymbol(rawFunction) if rec == nil { public := publicFunctionName(rawFunction) if public != rawFunction { - rec = funcInfoForSymbol(public) + rec = funcInfoForRuntimeSymbol(public) } } if rec == nil { @@ -568,6 +594,37 @@ func initRuntimeFuncPCFramesOnce() { entries[index] = pc } } + // Closure stubs are an ABI adapter and may go away in a future closure + // lowering. Keep the compatibility table light: it stores only target + // funcinfo record indexes, and live stub PCs are resolved lazily here. + if runtimeFuncInfoStubIndexes != nil && runtimeFuncInfoStubCount != 0 && runtimeFuncInfoStubCount <= runtimeFuncInfoCount { + for i := uintptr(0); i < runtimeFuncInfoStubCount; i++ { + index := funcInfoStubIndexAt(i) + if index == 0 || uintptr(index) > runtimeFuncInfoCount { + continue + } + fn := funcInfoAt(uintptr(index) - 1) + symbol := funcInfoJoinName(fn.symbolPkg, fn.symbolName) + if symbol == "" { + continue + } + pc := symbolPC(runtimeClosureStubPrefix + symbol) + if pc == 0 { + continue + } + function := publicFunctionName(funcInfoJoinName(fn.namePkg, fn.nameName)) + if function == "" { + function = publicFunctionName(symbol) + } + frames = append(frames, runtimeFuncPCFrame{ + entry: pc, + funcIndex: index, + function: function, + file: funcInfoJoinFile(fn.fileRoot, fn.fileName), + startLine: int(fn.line), + }) + } + } sortRuntimeFuncPCFrames(frames) frames = uniqueRuntimeFuncPCFrames(frames) runtimeFuncPCFrames = frames diff --git a/test/go/runtime_lineinfo_stack_test.go b/test/go/runtime_lineinfo_stack_test.go index a9e95bdbc3..52fffcabef 100644 --- a/test/go/runtime_lineinfo_stack_test.go +++ b/test/go/runtime_lineinfo_stack_test.go @@ -28,6 +28,7 @@ import ( const runtimeLineInfoProbe = `package main import ( + "reflect" "strconv" "runtime" "runtime/debug" @@ -38,8 +39,9 @@ import ( func main() { checkCaller() checkCallerSkip() - checkFrames() + checkFrames() // FRAMES_MAIN_MARK checkFuncForPC() + checkFuncForPCFunctionValue() checkFuncInfoRename() checkRuntimeStack() checkPanicStack() @@ -69,14 +71,25 @@ func helperCallerSkip() { //go:noinline func checkFrames() { var pcs [8]uintptr - n := runtime.Callers(0, pcs[:]) + n := runtime.Callers(0, pcs[:]) // FRAMES_CHECK_MARK frames := runtime.CallersFrames(pcs[:n]) + seenCheckFrames := false + seenMain := false for { frame, more := frames.Next() if frame.Function == "main.checkFrames" { - if !strings.HasSuffix(frame.File, "main.go") || frame.Line == 0 { - panic("bad frame") + if !strings.HasSuffix(frame.File, "main.go") || frame.Line != FRAMES_CHECK_LINE { + panic("bad checkFrames frame: " + frame.File + ":" + strconv.Itoa(frame.Line)) + } + seenCheckFrames = true + } + if frame.Function == "main.main" { + if !strings.HasSuffix(frame.File, "main.go") || frame.Line != FRAMES_MAIN_LINE { + panic("bad main frame: " + frame.File + ":" + strconv.Itoa(frame.Line)) } + seenMain = true + } + if seenCheckFrames && seenMain { return } if !more { @@ -108,6 +121,36 @@ func checkFuncForPC() { } } +//go:noinline +func entryPCTarget() int { + return 7 // FUNC_ENTRY_TARGET_MARK +} + +//go:noinline +func checkFuncForPCFunctionValue() { + if entryPCTarget() != 7 { + panic("bad target") + } + pc := reflect.ValueOf(entryPCTarget).Pointer() + if pc == 0 { + panic("missing function value pc") + } + fn := runtime.FuncForPC(pc) + if fn == nil { + panic("missing function value func") + } + if name := fn.Name(); name != "main.entryPCTarget" { + panic("bad function value func: " + name) + } + if entry := fn.Entry(); entry == 0 { + panic("missing function value entry") + } + file, line := fn.FileLine(pc) + if !strings.HasSuffix(file, "main.go") || line != FUNC_ENTRY_TARGET_LINE { + panic("bad function value fileline: " + file + ":" + strconv.Itoa(line)) + } +} + //go:noinline func checkFuncInfoRename() { pc := renamedPC() @@ -156,7 +199,10 @@ func TestRuntimeLineInfoAndStack(t *testing.T) { source := runtimeLineInfoProbe source = strings.ReplaceAll(source, "CALLER_LINE", strconv.Itoa(markerLine(source, "CALLER_MARK"))) source = strings.ReplaceAll(source, "CALLER_SKIP_LINE", strconv.Itoa(markerLine(source, "CALLER_SKIP_MARK"))) + source = strings.ReplaceAll(source, "FRAMES_MAIN_LINE", strconv.Itoa(markerLine(source, "FRAMES_MAIN_MARK"))) + source = strings.ReplaceAll(source, "FRAMES_CHECK_LINE", strconv.Itoa(markerLine(source, "FRAMES_CHECK_MARK"))) source = strings.ReplaceAll(source, "FUNC_FILELINE_LINE", strconv.Itoa(markerLine(source, "FUNC_FILELINE_MARK"))) + source = strings.ReplaceAll(source, "FUNC_ENTRY_TARGET_LINE", strconv.Itoa(markerLine(source, "FUNC_ENTRY_TARGET_MARK"))) source = strings.ReplaceAll(source, "RUNTIME_STACK_LINE", strconv.Itoa(markerLine(source, "RUNTIME_STACK_MARK"))) source = strings.ReplaceAll(source, "DEBUG_STACK_LINE", strconv.Itoa(markerLine(source, "DEBUG_STACK_CALL_MARK"))) From 8eec8e0040e064f48913bba792d9c23641171742 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Wed, 1 Jul 2026 12:50:07 +0800 Subject: [PATCH 14/59] runtime: publish funcinfo records for live stubs --- internal/build/build.go | 3 +- internal/build/funcinfo_table.go | 160 +++++++++++++++++++++++-- internal/build/funcinfo_table_test.go | 37 ++++-- internal/build/main_module.go | 2 +- runtime/internal/lib/runtime/symtab.go | 103 +++++++++++++++- 5 files changed, 279 insertions(+), 26 deletions(-) diff --git a/internal/build/build.go b/internal/build/build.go index e6c2501361..7d348651e5 100644 --- a/internal/build/build.go +++ b/internal/build/build.go @@ -1046,7 +1046,7 @@ func linkMainPkg(ctx *context, pkg *packages.Package, pkgs []*aPackage, outputPa // Use a stable synthetic name to avoid confusing it with the real main package in traces/logs. funcInfo := prepareFuncInfoTableRecords(collectFuncInfo(linkedOrder), nil) pcLineInfo := collectPCLineInfo(linkedOrder) - funcInfoStubs := collectFuncInfoStubIndexes(linkedOrder, funcInfo) + funcInfoStubs := collectFuncInfoStubRecords(linkedOrder, funcInfo) entryPkg := genMainModule(ctx, llssa.PkgRuntime, pkg, &genConfig{ rtInit: needRuntime, pyInit: needPyInit, @@ -1345,6 +1345,7 @@ func buildPkg(ctx *context, aPkg *aPackage, verbose bool) error { return fmt.Errorf("run LLVM passes failed for %v: %v", pkgPath, err) } } + emitFuncInfoStubSites(ctx, ret) printCmds := ctx.shouldPrintCommands(verbose) cgoLLFiles, cgoLdflags, err := buildCgo(ctx, aPkg, aPkg.Package.Syntax, externs, printCmds) diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index 64c2b12385..36e3ab9b1f 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -36,10 +36,14 @@ const ( funcInfoHashMaskSymbol = "__llgo_funcinfo_hash_mask" funcInfoStubIndexesSymbol = "__llgo_funcinfo_stub_indexes" funcInfoStubCountSymbol = "__llgo_funcinfo_stub_count" + funcInfoStubSiteStartPtrSymbol = "__llgo_funcinfo_stubsite_start" + funcInfoStubSiteEndPtrSymbol = "__llgo_funcinfo_stubsite_end" pcLineTableSymbol = "__llgo_pcline_table" pcLineCountSymbol = "__llgo_pcline_count" pcSiteStartPtrSymbol = "__llgo_pcsite_start" pcSiteEndPtrSymbol = "__llgo_pcsite_end" + funcInfoStubSiteStartSymbol = "__start_llgo_funcinfo_stubsite" + funcInfoStubSiteEndSymbol = "__stop_llgo_funcinfo_stubsite" pcSiteStartSymbol = "__start_llgo_pcline" pcSiteEndSymbol = "__stop_llgo_pcline" funcInfoDataSymbol = "__llgo_funcinfo_table$data" @@ -67,6 +71,11 @@ type pcLineRecord struct { column uint32 } +type funcInfoStubRecord struct { + symbol string + funcIndex uint32 +} + func collectFuncInfo(pkgs []Package) []funcInfoRecord { seen := make(map[string]funcInfoRecord) for _, pkg := range pkgs { @@ -125,7 +134,7 @@ func collectPCLineInfo(pkgs []Package) []pcLineRecord { return out } -func collectFuncInfoStubIndexes(pkgs []Package, records []funcInfoRecord) []uint32 { +func collectFuncInfoStubRecords(pkgs []Package, records []funcInfoRecord) []funcInfoStubRecord { if len(records) == 0 { return nil } @@ -135,17 +144,21 @@ func collectFuncInfoStubIndexes(pkgs []Package, records []funcInfoRecord) []uint recordBySymbol[rec.symbol] = uint32(i + 1) } } - seen := make(map[uint32]none) + seen := make(map[string]funcInfoStubRecord) for _, pkg := range pkgs { if pkg == nil || pkg.LPkg == nil { continue } fn := pkg.LPkg.Module().FirstFunction() for !fn.IsNil() { + if fn.IsDeclaration() || fn.BasicBlocksCount() == 0 { + fn = llvm.NextFunction(fn) + continue + } name := fn.Name() if target, ok := strings.CutPrefix(name, closureStubPrefix); ok { if idx := recordBySymbol[target]; idx != 0 { - seen[idx] = none{} + seen[name] = funcInfoStubRecord{symbol: name, funcIndex: idx} } } fn = llvm.NextFunction(fn) @@ -154,12 +167,12 @@ func collectFuncInfoStubIndexes(pkgs []Package, records []funcInfoRecord) []uint if len(seen) == 0 { return nil } - out := make([]uint32, 0, len(seen)) - for idx := range seen { - out = append(out, idx) + out := make([]funcInfoStubRecord, 0, len(seen)) + for _, rec := range seen { + out = append(out, rec) } sort.Slice(out, func(i, j int) bool { - return out[i] < out[j] + return out[i].symbol < out[j].symbol }) return out } @@ -238,7 +251,7 @@ func readPCLineInfo(mod llvm.Module) []pcLineRecord { return out } -func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord, pcLines []pcLineRecord, stubIndexes []uint32) { +func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord, pcLines []pcLineRecord, stubRecords []funcInfoStubRecord) { mod := pkg.Module() llvmCtx := mod.Context() i8Type := llvmCtx.Int8Type() @@ -261,6 +274,10 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord i32Type, i32Type, }, false) + stubSiteRecordType := llvmCtx.StructType([]llvm.Type{ + llvm.PointerType(i8Type, 0), + i64Type, + }, false) pcSiteRecordType := llvmCtx.StructType([]llvm.Type{ llvm.PointerType(i8Type, 0), i64Type, @@ -270,6 +287,8 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord pcLinePtr := llvm.AddGlobal(mod, llvm.PointerType(pcLineRecordType, 0), pcLineTableSymbol) pcSiteStartPtr := llvm.AddGlobal(mod, llvm.PointerType(pcSiteRecordType, 0), pcSiteStartPtrSymbol) pcSiteEndPtr := llvm.AddGlobal(mod, llvm.PointerType(pcSiteRecordType, 0), pcSiteEndPtrSymbol) + stubSiteStartPtr := llvm.AddGlobal(mod, llvm.PointerType(stubSiteRecordType, 0), funcInfoStubSiteStartPtrSymbol) + stubSiteEndPtr := llvm.AddGlobal(mod, llvm.PointerType(stubSiteRecordType, 0), funcInfoStubSiteEndPtrSymbol) stringsPtr := llvm.AddGlobal(mod, llvm.PointerType(i8Type, 0), funcInfoStringsSymbol) stringOffsetsPtr := llvm.AddGlobal(mod, llvm.PointerType(i32Type, 0), funcInfoStringOffsetsSymbol) stringCount := llvm.AddGlobal(mod, countType, funcInfoStringCountSymbol) @@ -284,6 +303,8 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord pcLinePtr.SetInitializer(llvm.ConstPointerNull(pcLinePtr.GlobalValueType())) pcSiteStartPtr.SetInitializer(llvm.ConstPointerNull(pcSiteStartPtr.GlobalValueType())) pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) + stubSiteStartPtr.SetInitializer(llvm.ConstPointerNull(stubSiteStartPtr.GlobalValueType())) + stubSiteEndPtr.SetInitializer(llvm.ConstPointerNull(stubSiteEndPtr.GlobalValueType())) stringsPtr.SetInitializer(llvm.ConstPointerNull(stringsPtr.GlobalValueType())) stringOffsetsPtr.SetInitializer(llvm.ConstPointerNull(stringOffsetsPtr.GlobalValueType())) stringCount.SetInitializer(llvm.ConstInt(countType, 0, false)) @@ -305,6 +326,8 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord pcLinePtr.SetInitializer(llvm.ConstPointerNull(pcLinePtr.GlobalValueType())) pcSiteStartPtr.SetInitializer(llvm.ConstPointerNull(pcSiteStartPtr.GlobalValueType())) pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) + stubSiteStartPtr.SetInitializer(llvm.ConstPointerNull(stubSiteStartPtr.GlobalValueType())) + stubSiteEndPtr.SetInitializer(llvm.ConstPointerNull(stubSiteEndPtr.GlobalValueType())) stringsPtr.SetInitializer(llvm.ConstPointerNull(stringsPtr.GlobalValueType())) stringOffsetsPtr.SetInitializer(llvm.ConstPointerNull(stringOffsetsPtr.GlobalValueType())) stringCount.SetInitializer(llvm.ConstInt(countType, 0, false)) @@ -374,7 +397,17 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) } } - emitRuntimeFuncInfoSentinels(mod, ctx.prog.PointerSize(), shouldEmitRuntimeELFSites(ctx) && len(pcLineValues) != 0) + emitELFSites := shouldEmitRuntimeELFSites(ctx) + emitRuntimeFuncInfoELFSites(mod, ctx.prog.PointerSize(), emitELFSites && len(pcLineValues) != 0, emitELFSites && len(stubRecords) != 0) + if emitELFSites && len(stubRecords) != 0 { + stubSiteStart := llvm.AddGlobal(mod, stubSiteRecordType, funcInfoStubSiteStartSymbol) + stubSiteEnd := llvm.AddGlobal(mod, stubSiteRecordType, funcInfoStubSiteEndSymbol) + stubSiteStartPtr.SetInitializer(stubSiteStart) + stubSiteEndPtr.SetInitializer(stubSiteEnd) + } else { + stubSiteStartPtr.SetInitializer(llvm.ConstPointerNull(stubSiteStartPtr.GlobalValueType())) + stubSiteEndPtr.SetInitializer(llvm.ConstPointerNull(stubSiteEndPtr.GlobalValueType())) + } stringArrayType := llvm.ArrayType(i8Type, len(encoded.Strings)) stringData := llvm.AddGlobal(mod, stringArrayType, funcInfoStringsDataSymbol) @@ -431,11 +464,17 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord hashMask.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Hash)-1), false)) } count.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Records)), false)) - stubIndexValues := make([]llvm.Value, 0, len(stubIndexes)) - for _, idx := range stubIndexes { + stubIndexSeen := make(map[uint32]none, len(stubRecords)) + stubIndexValues := make([]llvm.Value, 0, len(stubRecords)) + for _, stub := range stubRecords { + idx := stub.funcIndex if idx == 0 || int(idx) > len(encoded.Records) { continue } + if _, ok := stubIndexSeen[idx]; ok { + continue + } + stubIndexSeen[idx] = none{} stubIndexValues = append(stubIndexValues, llvm.ConstInt(i32Type, uint64(idx), false)) } if len(stubIndexValues) == 0 { @@ -464,8 +503,80 @@ func shouldEmitRuntimeELFSites(ctx *context) bool { ctx.buildConf.Target == "" } -func emitRuntimeFuncInfoSentinels(mod llvm.Module, pointerSize int, pcSite bool) { - if !pcSite { +func emitFuncInfoStubSites(ctx *context, pkg llssa.Package) { + if !shouldEmitRuntimeELFSites(ctx) || pkg == nil || !ctx.prog.FuncInfoMetadataEnabled() { + return + } + mod := pkg.Module() + llvmCtx := mod.Context() + builder := llvmCtx.NewBuilder() + defer builder.Dispose() + asmType := llvm.FunctionType(llvmCtx.VoidType(), nil, false) + ptrDirective := ".quad" + align := "3" + if ctx.prog.PointerSize() == 4 { + ptrDirective = ".long" + align = "2" + } + for fn := mod.FirstFunction(); !fn.IsNil(); fn = llvm.NextFunction(fn) { + if fn.IsDeclaration() || fn.BasicBlocksCount() == 0 { + continue + } + symbol := fn.Name() + target, ok := strings.CutPrefix(symbol, closureStubPrefix) + if !ok || target == "" { + continue + } + entry := fn.EntryBasicBlock() + if entry.IsNil() { + continue + } + first := entry.FirstInstruction() + if first.IsNil() { + builder.SetInsertPointAtEnd(entry) + } else { + builder.SetInsertPointBefore(first) + } + instruction := ".pushsection llgo_funcinfo_stubsite,\"ao\",@progbits," + asmQuoteELFSymbol(symbol) + "\n" + + ".p2align " + align + "\n" + + ptrDirective + " " + asmQuoteELFSymbol(symbol) + "\n" + + ".quad " + uint64Hex(funcInfoSymbolID(target)) + "\n" + + ".popsection" + asm := llvm.InlineAsm(asmType, instruction, "", true, false, llvm.InlineAsmDialectATT, false) + builder.CreateCall(asmType, asm, nil, "") + } +} + +func funcInfoSymbolID(symbol string) uint64 { + const ( + offset = uint64(14695981039346656037) + prime = uint64(1099511628211) + ) + h := offset + for i := 0; i < len(symbol); i++ { + h ^= uint64(symbol[i]) + h *= prime + } + if h == 0 { + return 1 + } + return h +} + +func uint64Hex(v uint64) string { + const hexdigits = "0123456789abcdef" + var buf [18]byte + buf[0] = '0' + buf[1] = 'x' + for i := len(buf) - 1; i >= 2; i-- { + buf[i] = hexdigits[v&0xf] + v >>= 4 + } + return string(buf[:]) +} + +func emitRuntimeFuncInfoELFSites(mod llvm.Module, pointerSize int, pcSite bool, stubSite bool) { + if !pcSite && !stubSite { return } ptrDirective := ".quad" @@ -481,9 +592,32 @@ func emitRuntimeFuncInfoSentinels(mod llvm.Module, pointerSize int, pcSite bool) asm.WriteString(ptrDirective + " 0\n") asm.WriteString(".quad 0\n") } + if stubSite { + asm.WriteString(".section llgo_funcinfo_stubsite,\"aR\",@progbits\n") + asm.WriteString(".p2align " + align + "\n") + asm.WriteString(ptrDirective + " 0\n") + asm.WriteString(".quad 0\n") + } mod.SetInlineAsm(asm.String()) } +func asmQuoteELFSymbol(symbol string) string { + var b strings.Builder + b.Grow(len(symbol) + 2) + b.WriteByte('"') + for i := 0; i < len(symbol); i++ { + switch symbol[i] { + case '\\', '"': + b.WriteByte('\\') + case '$': + b.WriteByte('$') + } + b.WriteByte(symbol[i]) + } + b.WriteByte('"') + return b.String() +} + func toFuncInfoRecords(records []funcInfoRecord) []buildfuncinfo.Record { out := make([]buildfuncinfo.Record, len(records)) for i, rec := range records { diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index 1a58825e51..31fc6838bc 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -90,14 +90,8 @@ func TestFuncInfoTableMaterializesClosureStubIndexes(t *testing.T) { src := prog.NewPackage("example.com/p", "example.com/p") src.EmitFuncInfo("example.com/p.live", "example.com/p.Live", "live.go", 17, 3) src.EmitFuncInfo("example.com/p.other", "example.com/p.Other", "other.go", 23, 1) - src.NewFunc(closureStubPrefix+"example.com/p.live", llssa.NoArgsNoRet, llssa.InC) - - records := collectFuncInfo([]Package{{LPkg: src}}) - stubs := collectFuncInfoStubIndexes([]Package{{LPkg: src}}, records) - if len(stubs) != 1 || records[stubs[0]-1].symbol != "example.com/p.live" { - t.Fatalf("stub indexes = %+v for records %+v, want live", stubs, records) - } - + stubFn := src.NewFunc(closureStubPrefix+"example.com/p.live", llssa.NoArgsNoRet, llssa.InC) + stubFn.MakeBody(1).Return() ctx := &context{ prog: prog, buildConf: &Config{ @@ -106,6 +100,27 @@ func TestFuncInfoTableMaterializesClosureStubIndexes(t *testing.T) { Goarch: "amd64", }, } + prog.EnableFuncInfoMetadata(true) + emitFuncInfoStubSites(ctx, src) + srcIR := src.String() + for _, want := range []string{ + "call void asm sideeffect", + ".pushsection llgo_funcinfo_stubsite", + `.quad \22__llgo_stub.example.com/p.live\22`, + ".quad 0x", + } { + if !strings.Contains(srcIR, want) { + t.Fatalf("package stub site IR missing %q:\n%s", want, srcIR) + } + } + + records := collectFuncInfo([]Package{{LPkg: src}}) + stubs := collectFuncInfoStubRecords([]Package{{LPkg: src}}, records) + if len(stubs) != 1 || records[stubs[0].funcIndex-1].symbol != "example.com/p.live" || + stubs[0].symbol != closureStubPrefix+"example.com/p.live" { + t.Fatalf("stub indexes = %+v for records %+v, want live", stubs, records) + } + entry := genMainModule(ctx, llssa.PkgRuntime, &packages.Package{ PkgPath: "example.com/main", ExportFile: "main.a", @@ -114,14 +129,18 @@ func TestFuncInfoTableMaterializesClosureStubIndexes(t *testing.T) { for _, want := range []string{ "@__llgo_funcinfo_stub_indexes = global ptr", "@__llgo_funcinfo_stub_count = global i64 1", + "@__llgo_funcinfo_stubsite_start = global ptr @__start_llgo_funcinfo_stubsite", + "@__llgo_funcinfo_stubsite_end = global ptr @__stop_llgo_funcinfo_stubsite", `@"__llgo_funcinfo_stub_indexes$data" = private unnamed_addr constant [1 x i32]`, "@__llgo_funcinfo_count = global i64 2", + "module asm \".section llgo_funcinfo_stubsite", + ".quad 0", } { if !strings.Contains(ir, want) { t.Fatalf("funcinfo stub index table IR missing %q:\n%s", want, ir) } } - if strings.Contains(ir, closureStubPrefix) { + if strings.Contains(ir, closureStubPrefix+"example.com/p.live\\00") { t.Fatalf("stub index table should not add stub symbol strings:\n%s", ir) } } diff --git a/internal/build/main_module.go b/internal/build/main_module.go index 6992bcec73..67378f6e2e 100644 --- a/internal/build/main_module.go +++ b/internal/build/main_module.go @@ -45,7 +45,7 @@ type genConfig struct { abiSymbols map[string]none funcInfo []funcInfoRecord pcLineInfo []pcLineRecord - funcInfoStubs []uint32 + funcInfoStubs []funcInfoStubRecord } // genMainModule generates the main entry module for an llgo program. diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 5dac41d161..0d425ecf0a 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -172,6 +172,17 @@ var runtimeFuncInfoStubIndexes *uint32 //go:linkname runtimeFuncInfoStubCount __llgo_funcinfo_stub_count var runtimeFuncInfoStubCount uintptr +type runtimeFuncInfoStubSiteRecord struct { + pc uintptr + symbolID uint64 +} + +//go:linkname runtimeFuncInfoStubSiteStart __llgo_funcinfo_stubsite_start +var runtimeFuncInfoStubSiteStart *runtimeFuncInfoStubSiteRecord + +//go:linkname runtimeFuncInfoStubSiteEnd __llgo_funcinfo_stubsite_end +var runtimeFuncInfoStubSiteEnd *runtimeFuncInfoStubSiteRecord + type runtimePCLineRecord struct { id uint64 funcIndex uint32 @@ -594,9 +605,11 @@ func initRuntimeFuncPCFramesOnce() { entries[index] = pc } } + frames = appendRuntimeFuncInfoStubSiteFrames(frames) // Closure stubs are an ABI adapter and may go away in a future closure - // lowering. Keep the compatibility table light: it stores only target - // funcinfo record indexes, and live stub PCs are resolved lazily here. + // lowering. Keep the fallback compatibility table light: it stores only + // target funcinfo record indexes. On ELF we prefer the associated stub-site + // section above because linkers do not expose local stubs through dlsym. if runtimeFuncInfoStubIndexes != nil && runtimeFuncInfoStubCount != 0 && runtimeFuncInfoStubCount <= runtimeFuncInfoCount { for i := uintptr(0); i < runtimeFuncInfoStubCount; i++ { index := funcInfoStubIndexAt(i) @@ -632,6 +645,92 @@ func initRuntimeFuncPCFramesOnce() { runtimeFuncPCIndex = buildRuntimeFuncPCIndex(frames) } +func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame) []runtimeFuncPCFrame { + if runtimeFuncInfoStubSiteStart == nil || runtimeFuncInfoStubSiteEnd == nil { + return frames + } + start := uintptr(unsafe.Pointer(runtimeFuncInfoStubSiteStart)) + end := uintptr(unsafe.Pointer(runtimeFuncInfoStubSiteEnd)) + size := unsafe.Sizeof(*runtimeFuncInfoStubSiteStart) + if end <= start || size == 0 || (end-start)%size != 0 { + return frames + } + nsite := (end - start) / size + if nsite > runtimeFuncInfoCount*16 || nsite > 1<<20 { + return frames + } + for i := uintptr(0); i < nsite; i++ { + site := (*runtimeFuncInfoStubSiteRecord)(unsafe.Pointer(start + i*size)) + if site == nil || site.pc == 0 || site.symbolID == 0 { + continue + } + funcIndex := funcInfoIndexForSymbolID(site.symbolID) + if funcIndex == 0 || uintptr(funcIndex) > runtimeFuncInfoCount { + continue + } + fn := funcInfoAt(uintptr(funcIndex) - 1) + symbol := funcInfoJoinName(fn.symbolPkg, fn.symbolName) + function := publicFunctionName(funcInfoJoinName(fn.namePkg, fn.nameName)) + if function == "" { + function = publicFunctionName(symbol) + } + frames = append(frames, runtimeFuncPCFrame{ + entry: site.pc, + funcIndex: funcIndex, + function: function, + file: funcInfoJoinFile(fn.fileRoot, fn.fileName), + startLine: int(fn.line), + }) + } + return frames +} + +func funcInfoIndexForSymbolID(id uint64) uint32 { + if id == 0 || runtimeFuncInfoTable == nil || runtimeFuncInfoCount == 0 { + return 0 + } + for i := uintptr(0); i < runtimeFuncInfoCount; i++ { + rec := funcInfoAt(i) + if funcInfoSymbolIDFromRecord(rec) == id { + return uint32(i + 1) + } + } + return 0 +} + +func funcInfoSymbolIDFromRecord(rec *runtimeFuncInfoRecord) uint64 { + const ( + offset = uint64(14695981039346656037) + prime = uint64(1099511628211) + ) + if rec == nil { + return 0 + } + h := offset + h = funcInfoHashCString(h, funcInfoCString(rec.symbolPkg)) + pkgLen := cStringLen(funcInfoCString(rec.symbolPkg)) + name := funcInfoCString(rec.symbolName) + if pkgLen != 0 && cStringLen(name) != 0 { + h ^= uint64('.') + h *= prime + } + h = funcInfoHashCString(h, name) + if h == 0 { + return 1 + } + return h +} + +func funcInfoHashCString(h uint64, s *c.Char) uint64 { + const prime = uint64(1099511628211) + for s != nil && *s != 0 { + h ^= uint64(byte(*s)) + h *= prime + s = (*c.Char)(unsafe.Add(unsafe.Pointer(s), 1)) + } + return h +} + func sortRuntimeFuncPCFrames(frames []runtimeFuncPCFrame) { if len(frames) < 2 { return From 01913f42c4f14881007b0d955f1ec41b1a89ba17 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Wed, 1 Jul 2026 13:12:00 +0800 Subject: [PATCH 15/59] runtime: skip ELF stub-site records during LTO --- internal/build/funcinfo_table.go | 11 ++++++++--- internal/build/funcinfo_table_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index 36e3ab9b1f..f7ffc57900 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -398,8 +398,9 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord } } emitELFSites := shouldEmitRuntimeELFSites(ctx) - emitRuntimeFuncInfoELFSites(mod, ctx.prog.PointerSize(), emitELFSites && len(pcLineValues) != 0, emitELFSites && len(stubRecords) != 0) - if emitELFSites && len(stubRecords) != 0 { + emitStubSites := shouldEmitRuntimeStubELFSites(ctx) + emitRuntimeFuncInfoELFSites(mod, ctx.prog.PointerSize(), emitELFSites && len(pcLineValues) != 0, emitStubSites && len(stubRecords) != 0) + if emitStubSites && len(stubRecords) != 0 { stubSiteStart := llvm.AddGlobal(mod, stubSiteRecordType, funcInfoStubSiteStartSymbol) stubSiteEnd := llvm.AddGlobal(mod, stubSiteRecordType, funcInfoStubSiteEndSymbol) stubSiteStartPtr.SetInitializer(stubSiteStart) @@ -503,8 +504,12 @@ func shouldEmitRuntimeELFSites(ctx *context) bool { ctx.buildConf.Target == "" } +func shouldEmitRuntimeStubELFSites(ctx *context) bool { + return shouldEmitRuntimeELFSites(ctx) && !ctx.buildConf.ltoEnabled() +} + func emitFuncInfoStubSites(ctx *context, pkg llssa.Package) { - if !shouldEmitRuntimeELFSites(ctx) || pkg == nil || !ctx.prog.FuncInfoMetadataEnabled() { + if !shouldEmitRuntimeStubELFSites(ctx) || pkg == nil || !ctx.prog.FuncInfoMetadataEnabled() { return } mod := pkg.Module() diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index 31fc6838bc..9ed6739cad 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -22,6 +22,7 @@ import ( "github.com/xgo-dev/llvm" + "github.com/goplus/llgo/internal/lto" "github.com/goplus/llgo/internal/packages" llssa "github.com/goplus/llgo/ssa" ) @@ -143,6 +144,30 @@ func TestFuncInfoTableMaterializesClosureStubIndexes(t *testing.T) { if strings.Contains(ir, closureStubPrefix+"example.com/p.live\\00") { t.Fatalf("stub index table should not add stub symbol strings:\n%s", ir) } + + ltoCtx := &context{ + prog: prog, + buildConf: &Config{ + BuildMode: BuildModeExe, + Goos: "linux", + Goarch: "amd64", + LTO: lto.Full, + }, + } + ltoEntry := genMainModule(ltoCtx, llssa.PkgRuntime, &packages.Package{ + PkgPath: "example.com/main", + ExportFile: "main.a", + }, &genConfig{funcInfo: records, funcInfoStubs: stubs}) + ltoIR := ltoEntry.LPkg.String() + for _, bad := range []string{ + "@__llgo_funcinfo_stubsite_start = global ptr @__start_llgo_funcinfo_stubsite", + "@__llgo_funcinfo_stubsite_end = global ptr @__stop_llgo_funcinfo_stubsite", + "module asm \".section llgo_funcinfo_stubsite", + } { + if strings.Contains(ltoIR, bad) { + t.Fatalf("full LTO funcinfo table should not emit stub site %q:\n%s", bad, ltoIR) + } + } } func TestFuncInfoTableMaterializesPCLineMetadata(t *testing.T) { From 6fa875bd9f1b946a22ed5ee7301125fc112f3d23 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Wed, 1 Jul 2026 14:40:21 +0800 Subject: [PATCH 16/59] runtime: reduce funcinfo lookup initialization cost --- internal/build/build.go | 3 + internal/build/build_test.go | 20 +++ .../lib/runtime/pprof_runtime_stub_llgo.go | 7 + runtime/internal/lib/runtime/symtab.go | 127 ++++++++++++------ 4 files changed, 119 insertions(+), 38 deletions(-) diff --git a/internal/build/build.go b/internal/build/build.go index 7d348651e5..97c68389dc 100644 --- a/internal/build/build.go +++ b/internal/build/build.go @@ -1107,6 +1107,9 @@ func linkedModuleGlobals(pkgs []Package) map[string]none { continue } for g := pkg.LPkg.Module().FirstGlobal(); !g.IsNil(); g = gllvm.NextGlobal(g) { + if g.IsDeclaration() { + continue + } seen[g.Name()] = none{} } } diff --git a/internal/build/build_test.go b/internal/build/build_test.go index bc6f89d785..c8ce08e9e6 100644 --- a/internal/build/build_test.go +++ b/internal/build/build_test.go @@ -22,6 +22,7 @@ import ( "github.com/goplus/llgo/internal/mockable" "github.com/goplus/llgo/internal/packages" llssa "github.com/goplus/llgo/ssa" + "github.com/xgo-dev/llvm" ) func TestMain(m *testing.M) { @@ -98,6 +99,25 @@ func TestIsFuncInfoEnabled(t *testing.T) { } } +func TestLinkedModuleGlobalsSkipsDeclarations(t *testing.T) { + prog := llssa.NewProgram(nil) + lpkg := prog.NewPackage("example.com/p", "example.com/p") + mod := lpkg.Module() + i32 := mod.Context().Int32Type() + + defined := llvm.AddGlobal(mod, i32, "example.com/p.defined") + defined.SetInitializer(llvm.ConstInt(i32, 1, false)) + llvm.AddGlobal(mod, i32, "example.com/p.declared") + + got := linkedModuleGlobals([]Package{{LPkg: lpkg}}) + if _, ok := got["example.com/p.defined"]; !ok { + t.Fatalf("linkedModuleGlobals missing defined global: %#v", got) + } + if _, ok := got["example.com/p.declared"]; ok { + t.Fatalf("linkedModuleGlobals should skip external declarations: %#v", got) + } +} + func mockRun(args []string, cfg *Config) { defer mockable.DisableMock() mockable.EnableMock() diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index ec0b91983e..a0bae4d160 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -114,6 +114,13 @@ func FuncForPC(pc uintptr) *Func { } func funcForPCSlow(pc uintptr) *Func { + if pc&3 != 0 { + if sym := frameSymbol(pc); sym.ok { + fn := newFuncForPC(pc, sym) + cacheFuncForPC(pc, fn) + return fn + } + } if sym, ok := funcPCFrameForPC(pc); ok { fn := newFuncForPC(pc, sym) cacheFuncForPC(pc, fn) diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 0d425ecf0a..bdfebb167d 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -222,9 +222,6 @@ var runtimePCLineFrames []runtimePCLineFrame type runtimeFuncPCFrame struct { entry uintptr funcIndex uint32 - function string - file string - startLine int } type runtimePCPageIndex struct { @@ -405,6 +402,34 @@ func funcInfoJoinName(pkgID, nameID uint16) string { return string(buf) } +func funcInfoNameLen(pkgID, nameID uint16) int { + pkgLen := cStringLen(funcInfoCString(pkgID)) + nameLen := cStringLen(funcInfoCString(nameID)) + if pkgLen == 0 { + return nameLen + } + if nameLen == 0 { + return pkgLen + } + return pkgLen + 1 + nameLen +} + +func appendFuncInfoName(dst []byte, pkgID, nameID uint16) []byte { + pkg := funcInfoCString(pkgID) + name := funcInfoCString(nameID) + pkgLen := cStringLen(pkg) + nameLen := cStringLen(name) + if pkgLen == 0 { + return cStringAppend(dst, name) + } + if nameLen == 0 { + return cStringAppend(dst, pkg) + } + dst = cStringAppend(dst, pkg) + dst = append(dst, '.') + return cStringAppend(dst, name) +} + func funcInfoJoinFile(rootID, nameID uint16) string { root := funcInfoCString(rootID) name := funcInfoCString(nameID) @@ -426,6 +451,53 @@ func funcInfoPackedFile(file uint32) string { return funcInfoJoinFile(uint16(file>>16), uint16(file)) } +func maxFuncInfoSymbolLen() int { + maxLen := 0 + for i := uintptr(0); i < runtimeFuncInfoCount; i++ { + fn := funcInfoAt(i) + if n := funcInfoNameLen(fn.symbolPkg, fn.symbolName); n > maxLen { + maxLen = n + } + } + return maxLen +} + +func symbolPCBytes(name []byte) uintptr { + if len(name) == 0 { + return 0 + } + name = append(name, 0) + return uintptr(clitedebug.Symbol((*c.Char)(unsafe.Pointer(&name[0])))) +} + +func symbolPCFuncInfoName(buf []byte, pkgID, nameID uint16) uintptr { + name := appendFuncInfoName(buf[:0], pkgID, nameID) + return symbolPCBytes(name) +} + +func symbolPCPrefixedFuncInfoName(buf []byte, prefix string, pkgID, nameID uint16) uintptr { + name := append(buf[:0], prefix...) + name = appendFuncInfoName(name, pkgID, nameID) + return symbolPCBytes(name) +} + +func funcInfoFunctionName(fn *runtimeFuncInfoRecord) string { + if fn == nil { + return "" + } + if function := publicFunctionName(funcInfoJoinName(fn.namePkg, fn.nameName)); function != "" { + return function + } + return publicFunctionName(funcInfoJoinName(fn.symbolPkg, fn.symbolName)) +} + +func funcInfoFileName(fn *runtimeFuncInfoRecord) string { + if fn == nil { + return "" + } + return funcInfoJoinFile(fn.fileRoot, fn.fileName) +} + func funcInfoForSymbol(symbol string) *runtimeFuncInfoRecord { if symbol == "" || runtimeFuncInfoTable == nil || runtimeFuncInfoCount == 0 { return nil @@ -582,24 +654,17 @@ func initRuntimeFuncPCFramesOnce() { } frames := make([]runtimeFuncPCFrame, 0, runtimeFuncInfoCount) entries := make([]uintptr, runtimeFuncInfoCount+1) + symbolBuf := make([]byte, 0, maxFuncInfoSymbolLen()+len(runtimeClosureStubPrefix)+1) for i := uintptr(0); i < runtimeFuncInfoCount; i++ { fn := funcInfoAt(i) - pc := symbolPC(funcInfoJoinName(fn.symbolPkg, fn.symbolName)) + pc := symbolPCFuncInfoName(symbolBuf, fn.symbolPkg, fn.symbolName) if pc == 0 { continue } index := uint32(i + 1) - function := publicFunctionName(funcInfoJoinName(fn.namePkg, fn.nameName)) - if function == "" { - function = publicFunctionName(funcInfoJoinName(fn.symbolPkg, fn.symbolName)) - } - file := funcInfoJoinFile(fn.fileRoot, fn.fileName) frames = append(frames, runtimeFuncPCFrame{ entry: pc, funcIndex: index, - function: function, - file: file, - startLine: int(fn.line), }) if entries[index] == 0 || pc < entries[index] { entries[index] = pc @@ -617,24 +682,13 @@ func initRuntimeFuncPCFramesOnce() { continue } fn := funcInfoAt(uintptr(index) - 1) - symbol := funcInfoJoinName(fn.symbolPkg, fn.symbolName) - if symbol == "" { - continue - } - pc := symbolPC(runtimeClosureStubPrefix + symbol) + pc := symbolPCPrefixedFuncInfoName(symbolBuf, runtimeClosureStubPrefix, fn.symbolPkg, fn.symbolName) if pc == 0 { continue } - function := publicFunctionName(funcInfoJoinName(fn.namePkg, fn.nameName)) - if function == "" { - function = publicFunctionName(symbol) - } frames = append(frames, runtimeFuncPCFrame{ entry: pc, funcIndex: index, - function: function, - file: funcInfoJoinFile(fn.fileRoot, fn.fileName), - startLine: int(fn.line), }) } } @@ -668,18 +722,9 @@ func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame) []runtimeF if funcIndex == 0 || uintptr(funcIndex) > runtimeFuncInfoCount { continue } - fn := funcInfoAt(uintptr(funcIndex) - 1) - symbol := funcInfoJoinName(fn.symbolPkg, fn.symbolName) - function := publicFunctionName(funcInfoJoinName(fn.namePkg, fn.nameName)) - if function == "" { - function = publicFunctionName(symbol) - } frames = append(frames, runtimeFuncPCFrame{ entry: site.pc, funcIndex: funcIndex, - function: function, - file: funcInfoJoinFile(fn.fileRoot, fn.fileName), - startLine: int(fn.line), }) } return frames @@ -885,13 +930,18 @@ func funcPCFrameForPC(pc uintptr) (pcSymbol, bool) { return pcSymbol{}, false } frame := runtimeFuncPCFrames[idx] + if frame.funcIndex == 0 || uintptr(frame.funcIndex) > runtimeFuncInfoCount { + return pcSymbol{}, false + } + fn := funcInfoAt(uintptr(frame.funcIndex) - 1) + line := int(fn.line) return pcSymbol{ pc: pc, entry: frame.entry, - function: frame.function, - file: frame.file, - line: frame.startLine, - startLine: frame.startLine, + function: funcInfoFunctionName(fn), + file: funcInfoFileName(fn), + line: line, + startLine: line, ok: true, }, true } @@ -945,6 +995,7 @@ func initRuntimePCLineFramesOnce() { return } frames := make([]runtimePCLineFrame, 0, nsite) + symbolBuf := make([]byte, 0, maxFuncInfoSymbolLen()+1) for i := uintptr(0); i < nsite; i++ { site := (*runtimePCSiteRecord)(unsafe.Pointer(start + i*size)) if site == nil || site.id == 0 || site.pc == 0 { @@ -958,7 +1009,7 @@ func initRuntimePCLineFramesOnce() { fn := funcInfoAt(uintptr(rec.funcIndex) - 1) entry := funcEntryForIndex(rec.funcIndex) if entry == 0 { - entry = symbolPC(funcInfoJoinName(fn.symbolPkg, fn.symbolName)) + entry = symbolPCFuncInfoName(symbolBuf, fn.symbolPkg, fn.symbolName) } if entry == 0 { sym := addrInfoSymbol(pc) From 34b274c16c2e1fca4fe7bc538cfd5e29f560126d Mon Sep 17 00:00:00 2001 From: Li Jie Date: Wed, 1 Jul 2026 17:21:54 +0800 Subject: [PATCH 17/59] test: cover runtime caller metadata edges --- cl/caller_frame_test.go | 219 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index 00bad87fed..b2a63fe47b 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -5,6 +5,7 @@ package cl import ( "go/ast" + "go/importer" "go/parser" "go/token" "go/types" @@ -129,6 +130,34 @@ func buildCallerFrameSSAPackage(t *testing.T, pkgPath, src string) (*gossa.Packa return ssapkg, files } +func newLLSSAProgForTarget(t *testing.T, target *llssa.Target) llssa.Program { + t.Helper() + prog := llssa.NewProgram(target) + prog.SetRuntime(func() *types.Package { + rt, err := importer.For("source", nil).Import(llssa.PkgRuntime) + if err != nil { + t.Fatal("load runtime failed:", err) + } + return rt + }) + if target != nil && target.GOARCH != "" { + prog.TypeSizes(types.SizesFor("gc", target.GOARCH)) + } + return prog +} + +func newRuntimeCallerAnalysis(pkg *gossa.Package) *runtimeCallerAnalysis { + funcs, trackable := collectRuntimeCallerFunctions(pkg) + return &runtimeCallerAnalysis{ + pkg: pkg, + funcs: funcs, + trackable: trackable, + callsites: collectRuntimeCallerCallsites(funcs), + memo: make(map[*gossa.Function]bool), + visiting: make(map[*gossa.Function]bool), + } +} + func TestRuntimeCallerPackageDetection(t *testing.T) { ssapkg, _ := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo import "runtime" @@ -215,6 +244,127 @@ func FuncForPC(pc uintptr) uintptr { return 0 } } } +func TestRuntimeCallerAnalysisEdgeCases(t *testing.T) { + if fnUsesRuntimeCaller(nil) { + t.Fatal("nil function should not use runtime caller metadata") + } + if fnUsesRuntimeCaller(&gossa.Function{}) { + t.Fatal("function without a package should not use runtime caller metadata") + } + if runtimeCallerFuncSet(nil) != nil { + t.Fatal("nil package should have no runtime caller set") + } + if fnHasDirectRuntimeCaller(nil) { + t.Fatal("nil function should not have direct runtime caller use") + } + if functionBelongsToPackage(nil, nil) { + t.Fatal("nil function/package should not belong to a package") + } + if typeBelongsToPackage(types.Typ[types.Int], nil) { + t.Fatal("types should not belong to a nil package") + } + if isRuntimeCallerLookupFunc(nil) { + t.Fatal("nil function should not be a runtime caller lookup") + } + called := false + forEachCall(nil, func(*gossa.CallCommon) { + called = true + }) + if called { + t.Fatal("forEachCall should ignore nil functions") + } + + ssapkg, _ := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +import "runtime" + +type I interface { Call() } +type J interface { Call() } +type T struct{} + +func target() { runtime.Caller(0) } +func plain() {} +func call(fn func()) { fn() } +func callRuntime() { call(target) } +func (T) Call() { runtime.Caller(0) } +func viaStatic() { var i I = T{}; i.Call() } +func viaChange(j J) { var i I = j; i.Call() } +func viaParam(i I) { i.Call() } +func passInterface() { var i I = T{}; viaParam(i) } +`) + analysis := newRuntimeCallerAnalysis(ssapkg) + if analysis.fnMayReachRuntimeCaller(nil) { + t.Fatal("nil function should not reach runtime caller metadata") + } + if targets, ok := analysis.functionValueTargets(ssapkg.Func("callRuntime"), ssapkg.Func("target")); !ok || !targets[ssapkg.Func("target")] { + t.Fatal("static function value should resolve to its target") + } + if _, ok := analysis.functionValueTargets(ssapkg.Func("target"), nil); ok { + t.Fatal("nil function value should be unresolved") + } + if _, ok := analysis.functionParamTargets(ssapkg.Func("call"), 99); ok { + t.Fatal("out-of-range function argument should be unresolved") + } + callFn := ssapkg.Func("call") + callParam := callFn.Params[0] + callParams := callFn.Params + callFn.Params = nil + if _, ok := analysis.functionValueTargets(callFn, callParam); ok { + t.Fatal("function parameter missing from Params should be unresolved") + } + callFn.Params = callParams + + iface := ssapkg.Pkg.Scope().Lookup("I").Type().Underlying().(*types.Interface) + method := iface.Method(0) + if !analysis.fnMayReachRuntimeCaller(ssapkg.Func("viaStatic")) { + t.Fatal("static interface dispatch should reach runtime caller metadata") + } + if !analysis.fnMayReachRuntimeCaller(ssapkg.Func("viaChange")) { + t.Fatal("changed interface dispatch should conservatively reach runtime caller metadata") + } + if targets, ok := analysis.interfaceMethodTargets(ssapkg.Func("viaParam"), ssapkg.Func("viaParam").Params[0], method); !ok || len(targets) == 0 { + t.Fatal("interface parameter callsites should resolve concrete method targets") + } + analysis.callsites[ssapkg.Func("viaParam")] = []*gossa.CallCommon{{}} + if _, ok := analysis.interfaceMethodTargets(ssapkg.Func("viaParam"), ssapkg.Func("viaParam").Params[0], method); ok { + t.Fatal("out-of-range interface argument should be unresolved") + } + if _, ok := analysis.interfaceMethodTargets(ssapkg.Func("viaStatic"), nil, method); ok { + t.Fatal("nil interface receiver should be unresolved") + } + if _, ok := analysis.staticInterfaceMethodTargets(&gossa.ChangeInterface{}, method); ok { + t.Fatal("empty interface conversion should be unresolved") + } + viaParam := ssapkg.Func("viaParam") + interfaceParam := viaParam.Params[0] + interfaceParams := viaParam.Params + viaParam.Params = nil + if _, ok := analysis.interfaceMethodTargets(viaParam, interfaceParam, method); ok { + t.Fatal("interface parameter missing from Params should be unresolved") + } + viaParam.Params = interfaceParams + if _, ok := analysis.methodTargetsForType(nil, nil); ok { + t.Fatal("nil method lookup should be unresolved") + } + other := types.NewFunc(token.NoPos, ssapkg.Pkg, "Other", nil) + if _, ok := analysis.methodTargetsForType(ssapkg.Type("T").Type(), other); ok { + t.Fatal("missing interface method should be unresolved") + } + if idx, ok := parameterIndex(ssapkg.Func("target"), nil); ok || idx != 0 { + t.Fatal("nil parameter should not be found") + } + + methodOnlyPkg, _ := buildCallerFrameSSAPackage(t, "example.com/methodonly", `package methodonly +import "runtime" + +type T struct{} +func (T) Call() { runtime.Caller(0) } +var _ = T{} +`) + if runtimeCallerFuncSet(methodOnlyPkg) != nil { + t.Fatal("method-only runtime caller use should not mark top-level functions") + } +} + func TestCallerFrameTrackingEligibility(t *testing.T) { if (&context{}).shouldTrackCallerFrames() { t.Fatal("missing compiler state should not track caller frames") @@ -380,6 +530,31 @@ func leaf() {} } } +func TestCompileRuntimeCallerPCLineMetadata32Bit(t *testing.T) { + ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +import "runtime" + +func top() { + runtime.Caller(0) +} +`) + prog := newLLSSAProgForTarget(t, &llssa.Target{GOOS: "linux", GOARCH: "386"}) + prog.EnableFuncInfoMetadata(true) + pkg, err := NewPackage(prog, ssapkg, files) + if err != nil { + t.Fatal(err) + } + ir := pkg.Module().String() + for _, want := range []string{ + `.p2align 2`, + `.long __llgo_pcsite_`, + } { + if !strings.Contains(ir, want) { + t.Fatalf("missing 32-bit pcline asm %q:\n%s", want, ir) + } + } +} + func TestCompileRuntimeCallerPCLineEscapesDollarInInlineAsm(t *testing.T) { ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo import "runtime" @@ -412,6 +587,50 @@ func top() { } } +func TestRuntimeCallerInstrumentationEdgeCases(t *testing.T) { + ssapkg, _ := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +import "runtime" + +func top() { + runtime.Caller(0) +} +`) + prog := newLLSSAProgForTarget(t, &llssa.Target{GOOS: "linux", GOARCH: "amd64"}) + prog.EnableFuncInfoMetadata(true) + pkg := prog.NewPackage("foo", "example.com/foo") + fn := pkg.NewFunc("example.com/foo.top", llssa.NoArgsNoRet, llssa.InGo) + ctx := &context{ + prog: prog, + pkg: pkg, + fn: fn, + goFn: ssapkg.Func("top"), + fset: token.NewFileSet(), + trackCallerFrames: true, + runtimeCallerFuncs: runtimeCallerFuncSet(ssapkg), + } + var b llssa.Builder + ctx.pushCallerLocationFrame(b, nil) + ctx.recordRuntimeLocation(b, token.NoPos, "RecordCallerLocation") + ctx.emitPCLineLabel(b, token.NoPos) + ctx.popCallerLocationFrame(b) + + if pos := (&context{}).funcInfoPosition(nil); pos.IsValid() { + t.Fatal("nil function should have no funcinfo position") + } + if canEmitPCLineLabelsForTarget(nil) { + t.Fatal("nil target should not emit pc-line labels") + } + if canEmitPCLineLabelsForTarget(&llssa.Target{GOOS: "linux", GOARCH: "wasm"}) { + t.Fatal("wasm target should not emit pc-line labels") + } + if canEmitPCLineLabelsForTarget(&llssa.Target{GOOS: "linux", GOARCH: "amd64", Target: "esp32"}) { + t.Fatal("named target should not emit pc-line labels") + } + if got, want := asmQuoteSymbol(`a\b"c$d`), `"a\\b\"c$$d"`; got != want { + t.Fatalf("asmQuoteSymbol() = %q, want %q", got, want) + } +} + func TestCompileRuntimeCallerPCLineMetadataSkippedOnDarwin(t *testing.T) { ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo import "runtime" From 3b87c907c9e26a169f146bcf0e461a1769936f19 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Wed, 1 Jul 2026 21:24:18 +0800 Subject: [PATCH 18/59] runtime: speed up funcinfo entry lookup --- internal/build/build.go | 1 + internal/build/funcinfo_table.go | 107 ++++++++++++- internal/build/funcinfo_table_test.go | 99 +++++++++++- .../lib/runtime/pprof_runtime_stub_llgo.go | 16 ++ runtime/internal/lib/runtime/symtab.go | 151 ++++++++++++++---- 5 files changed, 336 insertions(+), 38 deletions(-) diff --git a/internal/build/build.go b/internal/build/build.go index 97c68389dc..f5a7c87f8a 100644 --- a/internal/build/build.go +++ b/internal/build/build.go @@ -1348,6 +1348,7 @@ func buildPkg(ctx *context, aPkg *aPackage, verbose bool) error { return fmt.Errorf("run LLVM passes failed for %v: %v", pkgPath, err) } } + emitFuncInfoEntrySites(ctx, ret) emitFuncInfoStubSites(ctx, ret) printCmds := ctx.shouldPrintCommands(verbose) diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index f7ffc57900..c7a6d1498c 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -36,12 +36,16 @@ const ( funcInfoHashMaskSymbol = "__llgo_funcinfo_hash_mask" funcInfoStubIndexesSymbol = "__llgo_funcinfo_stub_indexes" funcInfoStubCountSymbol = "__llgo_funcinfo_stub_count" + funcInfoEntryStartPtrSymbol = "__llgo_funcinfo_entry_start" + funcInfoEntryEndPtrSymbol = "__llgo_funcinfo_entry_end" funcInfoStubSiteStartPtrSymbol = "__llgo_funcinfo_stubsite_start" funcInfoStubSiteEndPtrSymbol = "__llgo_funcinfo_stubsite_end" pcLineTableSymbol = "__llgo_pcline_table" pcLineCountSymbol = "__llgo_pcline_count" pcSiteStartPtrSymbol = "__llgo_pcsite_start" pcSiteEndPtrSymbol = "__llgo_pcsite_end" + funcInfoEntryStartSymbol = "__start_llgo_funcinfo_entry" + funcInfoEntryEndSymbol = "__stop_llgo_funcinfo_entry" funcInfoStubSiteStartSymbol = "__start_llgo_funcinfo_stubsite" funcInfoStubSiteEndSymbol = "__stop_llgo_funcinfo_stubsite" pcSiteStartSymbol = "__start_llgo_pcline" @@ -274,6 +278,10 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord i32Type, i32Type, }, false) + funcEntryRecordType := llvmCtx.StructType([]llvm.Type{ + llvm.PointerType(i8Type, 0), + i64Type, + }, false) stubSiteRecordType := llvmCtx.StructType([]llvm.Type{ llvm.PointerType(i8Type, 0), i64Type, @@ -287,6 +295,8 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord pcLinePtr := llvm.AddGlobal(mod, llvm.PointerType(pcLineRecordType, 0), pcLineTableSymbol) pcSiteStartPtr := llvm.AddGlobal(mod, llvm.PointerType(pcSiteRecordType, 0), pcSiteStartPtrSymbol) pcSiteEndPtr := llvm.AddGlobal(mod, llvm.PointerType(pcSiteRecordType, 0), pcSiteEndPtrSymbol) + entryStartPtr := llvm.AddGlobal(mod, llvm.PointerType(funcEntryRecordType, 0), funcInfoEntryStartPtrSymbol) + entryEndPtr := llvm.AddGlobal(mod, llvm.PointerType(funcEntryRecordType, 0), funcInfoEntryEndPtrSymbol) stubSiteStartPtr := llvm.AddGlobal(mod, llvm.PointerType(stubSiteRecordType, 0), funcInfoStubSiteStartPtrSymbol) stubSiteEndPtr := llvm.AddGlobal(mod, llvm.PointerType(stubSiteRecordType, 0), funcInfoStubSiteEndPtrSymbol) stringsPtr := llvm.AddGlobal(mod, llvm.PointerType(i8Type, 0), funcInfoStringsSymbol) @@ -303,6 +313,8 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord pcLinePtr.SetInitializer(llvm.ConstPointerNull(pcLinePtr.GlobalValueType())) pcSiteStartPtr.SetInitializer(llvm.ConstPointerNull(pcSiteStartPtr.GlobalValueType())) pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) + entryStartPtr.SetInitializer(llvm.ConstPointerNull(entryStartPtr.GlobalValueType())) + entryEndPtr.SetInitializer(llvm.ConstPointerNull(entryEndPtr.GlobalValueType())) stubSiteStartPtr.SetInitializer(llvm.ConstPointerNull(stubSiteStartPtr.GlobalValueType())) stubSiteEndPtr.SetInitializer(llvm.ConstPointerNull(stubSiteEndPtr.GlobalValueType())) stringsPtr.SetInitializer(llvm.ConstPointerNull(stringsPtr.GlobalValueType())) @@ -326,6 +338,8 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord pcLinePtr.SetInitializer(llvm.ConstPointerNull(pcLinePtr.GlobalValueType())) pcSiteStartPtr.SetInitializer(llvm.ConstPointerNull(pcSiteStartPtr.GlobalValueType())) pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) + entryStartPtr.SetInitializer(llvm.ConstPointerNull(entryStartPtr.GlobalValueType())) + entryEndPtr.SetInitializer(llvm.ConstPointerNull(entryEndPtr.GlobalValueType())) stubSiteStartPtr.SetInitializer(llvm.ConstPointerNull(stubSiteStartPtr.GlobalValueType())) stubSiteEndPtr.SetInitializer(llvm.ConstPointerNull(stubSiteEndPtr.GlobalValueType())) stringsPtr.SetInitializer(llvm.ConstPointerNull(stringsPtr.GlobalValueType())) @@ -398,8 +412,18 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord } } emitELFSites := shouldEmitRuntimeELFSites(ctx) + emitEntrySites := shouldEmitRuntimeEntryELFSites(ctx) && len(encoded.Records) != 0 emitStubSites := shouldEmitRuntimeStubELFSites(ctx) - emitRuntimeFuncInfoELFSites(mod, ctx.prog.PointerSize(), emitELFSites && len(pcLineValues) != 0, emitStubSites && len(stubRecords) != 0) + emitRuntimeFuncInfoELFSites(mod, ctx.prog.PointerSize(), emitELFSites && len(pcLineValues) != 0, emitEntrySites, emitStubSites && len(stubRecords) != 0) + if emitEntrySites { + entryStart := llvm.AddGlobal(mod, funcEntryRecordType, funcInfoEntryStartSymbol) + entryEnd := llvm.AddGlobal(mod, funcEntryRecordType, funcInfoEntryEndSymbol) + entryStartPtr.SetInitializer(entryStart) + entryEndPtr.SetInitializer(entryEnd) + } else { + entryStartPtr.SetInitializer(llvm.ConstPointerNull(entryStartPtr.GlobalValueType())) + entryEndPtr.SetInitializer(llvm.ConstPointerNull(entryEndPtr.GlobalValueType())) + } if emitStubSites && len(stubRecords) != 0 { stubSiteStart := llvm.AddGlobal(mod, stubSiteRecordType, funcInfoStubSiteStartSymbol) stubSiteEnd := llvm.AddGlobal(mod, stubSiteRecordType, funcInfoStubSiteEndSymbol) @@ -505,7 +529,70 @@ func shouldEmitRuntimeELFSites(ctx *context) bool { } func shouldEmitRuntimeStubELFSites(ctx *context) bool { - return shouldEmitRuntimeELFSites(ctx) && !ctx.buildConf.ltoEnabled() + return shouldEmitRuntimeELFSites(ctx) +} + +func shouldEmitRuntimeEntryELFSites(ctx *context) bool { + return shouldEmitRuntimeELFSites(ctx) +} + +func emitFuncInfoEntrySites(ctx *context, pkg llssa.Package) { + if !shouldEmitRuntimeEntryELFSites(ctx) || pkg == nil || !ctx.prog.FuncInfoMetadataEnabled() { + return + } + mod := pkg.Module() + records := readFuncInfo(mod) + if len(records) == 0 { + return + } + symbolIDs := make(map[string]uint64, len(records)) + for _, rec := range records { + if rec.symbol != "" { + symbolIDs[rec.symbol] = funcInfoSymbolID(rec.symbol) + } + } + if len(symbolIDs) == 0 { + return + } + llvmCtx := mod.Context() + builder := llvmCtx.NewBuilder() + defer builder.Dispose() + asmType := llvm.FunctionType(llvmCtx.VoidType(), nil, false) + ptrDirective := ".quad" + align := "3" + if ctx.prog.PointerSize() == 4 { + ptrDirective = ".long" + align = "2" + } + for fn := mod.FirstFunction(); !fn.IsNil(); fn = llvm.NextFunction(fn) { + if fn.IsDeclaration() || fn.BasicBlocksCount() == 0 { + continue + } + symbol := fn.Name() + symbolID := symbolIDs[symbol] + if symbolID == 0 { + continue + } + entry := fn.EntryBasicBlock() + if entry.IsNil() { + continue + } + first := entry.FirstInstruction() + if first.IsNil() { + builder.SetInsertPointAtEnd(entry) + } else { + builder.SetInsertPointBefore(first) + } + anchor := ".Lllgo_funcinfo_entry_anchor_${:uid}" + instruction := anchor + ":\n" + + ".pushsection llgo_funcinfo_entry,\"ao\",@progbits," + anchor + "\n" + + ".p2align " + align + "\n" + + ptrDirective + " " + anchor + "\n" + + ".quad " + uint64Hex(symbolID) + "\n" + + ".popsection" + asm := llvm.InlineAsm(asmType, instruction, "", true, false, llvm.InlineAsmDialectATT, false) + builder.CreateCall(asmType, asm, nil, "") + } } func emitFuncInfoStubSites(ctx *context, pkg llssa.Package) { @@ -542,9 +629,11 @@ func emitFuncInfoStubSites(ctx *context, pkg llssa.Package) { } else { builder.SetInsertPointBefore(first) } - instruction := ".pushsection llgo_funcinfo_stubsite,\"ao\",@progbits," + asmQuoteELFSymbol(symbol) + "\n" + + anchor := ".Lllgo_funcinfo_stubsite_anchor_${:uid}" + instruction := anchor + ":\n" + + ".pushsection llgo_funcinfo_stubsite,\"ao\",@progbits," + anchor + "\n" + ".p2align " + align + "\n" + - ptrDirective + " " + asmQuoteELFSymbol(symbol) + "\n" + + ptrDirective + " " + anchor + "\n" + ".quad " + uint64Hex(funcInfoSymbolID(target)) + "\n" + ".popsection" asm := llvm.InlineAsm(asmType, instruction, "", true, false, llvm.InlineAsmDialectATT, false) @@ -580,8 +669,8 @@ func uint64Hex(v uint64) string { return string(buf[:]) } -func emitRuntimeFuncInfoELFSites(mod llvm.Module, pointerSize int, pcSite bool, stubSite bool) { - if !pcSite && !stubSite { +func emitRuntimeFuncInfoELFSites(mod llvm.Module, pointerSize int, pcSite bool, entrySite bool, stubSite bool) { + if !pcSite && !entrySite && !stubSite { return } ptrDirective := ".quad" @@ -597,6 +686,12 @@ func emitRuntimeFuncInfoELFSites(mod llvm.Module, pointerSize int, pcSite bool, asm.WriteString(ptrDirective + " 0\n") asm.WriteString(".quad 0\n") } + if entrySite { + asm.WriteString(".section llgo_funcinfo_entry,\"aR\",@progbits\n") + asm.WriteString(".p2align " + align + "\n") + asm.WriteString(ptrDirective + " 0\n") + asm.WriteString(".quad 0\n") + } if stubSite { asm.WriteString(".section llgo_funcinfo_stubsite,\"aR\",@progbits\n") asm.WriteString(".p2align " + align + "\n") diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index 9ed6739cad..1c7e3cb3aa 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -64,10 +64,13 @@ func TestFuncInfoTableMaterializesMetadataWithoutFunctionPointers(t *testing.T) "@__llgo_funcinfo_string_count = global i64 5", "@__llgo_funcinfo_hash = global ptr", "@__llgo_funcinfo_count = global i64 1", + "@__llgo_funcinfo_entry_start = global ptr @__start_llgo_funcinfo_entry", + "@__llgo_funcinfo_entry_end = global ptr @__stop_llgo_funcinfo_entry", "@__llgo_funcinfo_stub_indexes = global ptr null", "@__llgo_funcinfo_stub_count = global i64 0", "@__llgo_pcline_count = global i64 0", "@__llgo_funcinfo_hash_mask = global i64 1", + "module asm \".section llgo_funcinfo_entry", `@"__llgo_funcinfo_table$data" = private unnamed_addr constant [1 x { i16, i16, i16, i16, i16, i16, i32 }]`, `@"__llgo_funcinfo_string_offsets$data" = private unnamed_addr constant`, `@"__llgo_funcinfo_hash$data" = private unnamed_addr constant [2 x i16]`, @@ -86,6 +89,88 @@ func TestFuncInfoTableMaterializesMetadataWithoutFunctionPointers(t *testing.T) } } +func TestFuncInfoTableMaterializesEntrySites(t *testing.T) { + prog := llssa.NewProgram(nil) + src := prog.NewPackage("example.com/p", "example.com/p") + src.EmitFuncInfo("example.com/p.live", "example.com/p.Live", "live.go", 17, 3) + src.EmitFuncInfo("example.com/p.missing", "example.com/p.Missing", "missing.go", 19, 1) + liveFn := src.NewFunc("example.com/p.live", llssa.NoArgsNoRet, llssa.InC) + liveFn.MakeBody(1).Return() + otherFn := src.NewFunc("example.com/p.other", llssa.NoArgsNoRet, llssa.InC) + otherFn.MakeBody(1).Return() + ctx := &context{ + prog: prog, + buildConf: &Config{ + BuildMode: BuildModeExe, + Goos: "linux", + Goarch: "amd64", + }, + } + prog.EnableFuncInfoMetadata(true) + emitFuncInfoEntrySites(ctx, src) + srcIR := src.String() + for _, want := range []string{ + "call void asm sideeffect", + ".pushsection llgo_funcinfo_entry", + ".Lllgo_funcinfo_entry_anchor_", + ".quad .Lllgo_funcinfo_entry_anchor_", + ".quad 0x", + } { + if !strings.Contains(srcIR, want) { + t.Fatalf("package entry site IR missing %q:\n%s", want, srcIR) + } + } + for _, bad := range []string{ + `.quad \22example.com/p.live\22`, + `.quad \22example.com/p.other\22`, + `.quad \22example.com/p.missing\22`, + } { + if strings.Contains(srcIR, bad) { + t.Fatalf("package entry site IR should not contain %q:\n%s", bad, srcIR) + } + } + + records := collectFuncInfo([]Package{{LPkg: src}}) + entry := genMainModule(ctx, llssa.PkgRuntime, &packages.Package{ + PkgPath: "example.com/main", + ExportFile: "main.a", + }, &genConfig{funcInfo: records}) + ir := entry.LPkg.String() + for _, want := range []string{ + "@__llgo_funcinfo_entry_start = global ptr @__start_llgo_funcinfo_entry", + "@__llgo_funcinfo_entry_end = global ptr @__stop_llgo_funcinfo_entry", + "module asm \".section llgo_funcinfo_entry", + } { + if !strings.Contains(ir, want) { + t.Fatalf("funcinfo entry table IR missing %q:\n%s", want, ir) + } + } + + ltoCtx := &context{ + prog: prog, + buildConf: &Config{ + BuildMode: BuildModeExe, + Goos: "linux", + Goarch: "amd64", + LTO: lto.Full, + }, + } + ltoEntry := genMainModule(ltoCtx, llssa.PkgRuntime, &packages.Package{ + PkgPath: "example.com/main", + ExportFile: "main.a", + }, &genConfig{funcInfo: records}) + ltoIR := ltoEntry.LPkg.String() + for _, want := range []string{ + "@__llgo_funcinfo_entry_start = global ptr @__start_llgo_funcinfo_entry", + "@__llgo_funcinfo_entry_end = global ptr @__stop_llgo_funcinfo_entry", + "module asm \".section llgo_funcinfo_entry", + } { + if !strings.Contains(ltoIR, want) { + t.Fatalf("full LTO funcinfo table IR missing %q:\n%s", want, ltoIR) + } + } +} + func TestFuncInfoTableMaterializesClosureStubIndexes(t *testing.T) { prog := llssa.NewProgram(nil) src := prog.NewPackage("example.com/p", "example.com/p") @@ -107,13 +192,17 @@ func TestFuncInfoTableMaterializesClosureStubIndexes(t *testing.T) { for _, want := range []string{ "call void asm sideeffect", ".pushsection llgo_funcinfo_stubsite", - `.quad \22__llgo_stub.example.com/p.live\22`, + ".Lllgo_funcinfo_stubsite_anchor_", + ".quad .Lllgo_funcinfo_stubsite_anchor_", ".quad 0x", } { if !strings.Contains(srcIR, want) { t.Fatalf("package stub site IR missing %q:\n%s", want, srcIR) } } + if strings.Contains(srcIR, `.quad \22__llgo_stub.example.com/p.live\22`) { + t.Fatalf("package stub site must not reference stub function symbols:\n%s", srcIR) + } records := collectFuncInfo([]Package{{LPkg: src}}) stubs := collectFuncInfoStubRecords([]Package{{LPkg: src}}, records) @@ -159,13 +248,13 @@ func TestFuncInfoTableMaterializesClosureStubIndexes(t *testing.T) { ExportFile: "main.a", }, &genConfig{funcInfo: records, funcInfoStubs: stubs}) ltoIR := ltoEntry.LPkg.String() - for _, bad := range []string{ + for _, want := range []string{ "@__llgo_funcinfo_stubsite_start = global ptr @__start_llgo_funcinfo_stubsite", "@__llgo_funcinfo_stubsite_end = global ptr @__stop_llgo_funcinfo_stubsite", "module asm \".section llgo_funcinfo_stubsite", } { - if strings.Contains(ltoIR, bad) { - t.Fatalf("full LTO funcinfo table should not emit stub site %q:\n%s", bad, ltoIR) + if !strings.Contains(ltoIR, want) { + t.Fatalf("full LTO funcinfo stub site table IR missing %q:\n%s", want, ltoIR) } } } @@ -291,6 +380,8 @@ func TestFuncInfoTableEmptyDefinitions(t *testing.T) { "@__llgo_funcinfo_string_count = global i64 0", "@__llgo_funcinfo_hash = global ptr null", "@__llgo_funcinfo_count = global i64 0", + "@__llgo_funcinfo_entry_start = global ptr null", + "@__llgo_funcinfo_entry_end = global ptr null", "@__llgo_funcinfo_stub_indexes = global ptr null", "@__llgo_funcinfo_stub_count = global i64 0", "@__llgo_pcline_count = global i64 0", diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index a0bae4d160..4548771399 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -120,6 +120,22 @@ func funcForPCSlow(pc uintptr) *Func { cacheFuncForPC(pc, fn) return fn } + } else if pc != 0 { + // Function-value PCs point at the real function entry. ELF funcinfo + // entry-site anchors are emitted from LLVM IR and can land after the + // backend prologue, so an exact entry PC may sort before its anchor. + // Prefer native symbol info only when it is an exact entry match; the + // section table below remains the normal fast fallback. + if sym := addrInfoSymbol(pc); sym.ok && sym.entry == pc && sym.function != "" { + fn := newFuncForPC(pc, sym) + cacheFuncForPC(pc, fn) + return fn + } + if sym, ok := funcPCFrameForEntryPC(pc); ok { + fn := newFuncForPC(pc, sym) + cacheFuncForPC(pc, fn) + return fn + } } if sym, ok := funcPCFrameForPC(pc); ok { fn := newFuncForPC(pc, sym) diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index bdfebb167d..098c42fbef 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -172,6 +172,17 @@ var runtimeFuncInfoStubIndexes *uint32 //go:linkname runtimeFuncInfoStubCount __llgo_funcinfo_stub_count var runtimeFuncInfoStubCount uintptr +type runtimeFuncInfoEntryRecord struct { + pc uintptr + symbolID uint64 +} + +//go:linkname runtimeFuncInfoEntryStart __llgo_funcinfo_entry_start +var runtimeFuncInfoEntryStart *runtimeFuncInfoEntryRecord + +//go:linkname runtimeFuncInfoEntryEnd __llgo_funcinfo_entry_end +var runtimeFuncInfoEntryEnd *runtimeFuncInfoEntryRecord + type runtimeFuncInfoStubSiteRecord struct { pc uintptr symbolID uint64 @@ -230,6 +241,7 @@ type runtimePCPageIndex struct { } const runtimeFuncPCPageShift = 12 +const runtimeFuncPCEntrySlack = 64 var runtimeFuncPCInitState uint32 var runtimeFuncPCFrames []runtimeFuncPCFrame @@ -654,28 +666,39 @@ func initRuntimeFuncPCFramesOnce() { } frames := make([]runtimeFuncPCFrame, 0, runtimeFuncInfoCount) entries := make([]uintptr, runtimeFuncInfoCount+1) - symbolBuf := make([]byte, 0, maxFuncInfoSymbolLen()+len(runtimeClosureStubPrefix)+1) - for i := uintptr(0); i < runtimeFuncInfoCount; i++ { - fn := funcInfoAt(i) - pc := symbolPCFuncInfoName(symbolBuf, fn.symbolPkg, fn.symbolName) - if pc == 0 { - continue - } - index := uint32(i + 1) - frames = append(frames, runtimeFuncPCFrame{ - entry: pc, - funcIndex: index, - }) - if entries[index] == 0 || pc < entries[index] { - entries[index] = pc + var indexBySymbolID map[uint64]uint32 + if runtimeFuncInfoEntryStart != nil || runtimeFuncInfoStubSiteStart != nil { + indexBySymbolID = funcInfoIndexBySymbolID() + } + frames, usedEntrySites := appendRuntimeFuncInfoEntryFrames(frames, entries, indexBySymbolID) + symbolBuf := []byte(nil) + if !usedEntrySites { + symbolBuf = make([]byte, 0, maxFuncInfoSymbolLen()+len(runtimeClosureStubPrefix)+1) + for i := uintptr(0); i < runtimeFuncInfoCount; i++ { + fn := funcInfoAt(i) + pc := symbolPCFuncInfoName(symbolBuf, fn.symbolPkg, fn.symbolName) + if pc == 0 { + continue + } + index := uint32(i + 1) + frames = append(frames, runtimeFuncPCFrame{ + entry: pc, + funcIndex: index, + }) + if entries[index] == 0 || pc < entries[index] { + entries[index] = pc + } } } - frames = appendRuntimeFuncInfoStubSiteFrames(frames) + frames = appendRuntimeFuncInfoStubSiteFrames(frames, indexBySymbolID) // Closure stubs are an ABI adapter and may go away in a future closure // lowering. Keep the fallback compatibility table light: it stores only // target funcinfo record indexes. On ELF we prefer the associated stub-site // section above because linkers do not expose local stubs through dlsym. if runtimeFuncInfoStubIndexes != nil && runtimeFuncInfoStubCount != 0 && runtimeFuncInfoStubCount <= runtimeFuncInfoCount { + if symbolBuf == nil { + symbolBuf = make([]byte, 0, maxFuncInfoSymbolLen()+len(runtimeClosureStubPrefix)+1) + } for i := uintptr(0); i < runtimeFuncInfoStubCount; i++ { index := funcInfoStubIndexAt(i) if index == 0 || uintptr(index) > runtimeFuncInfoCount { @@ -699,7 +722,43 @@ func initRuntimeFuncPCFramesOnce() { runtimeFuncPCIndex = buildRuntimeFuncPCIndex(frames) } -func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame) []runtimeFuncPCFrame { +func appendRuntimeFuncInfoEntryFrames(frames []runtimeFuncPCFrame, entries []uintptr, indexBySymbolID map[uint64]uint32) ([]runtimeFuncPCFrame, bool) { + if runtimeFuncInfoEntryStart == nil || runtimeFuncInfoEntryEnd == nil { + return frames, false + } + start := uintptr(unsafe.Pointer(runtimeFuncInfoEntryStart)) + end := uintptr(unsafe.Pointer(runtimeFuncInfoEntryEnd)) + size := unsafe.Sizeof(*runtimeFuncInfoEntryStart) + if end <= start || size == 0 || (end-start)%size != 0 { + return frames, false + } + nsite := (end - start) / size + if nsite > runtimeFuncInfoCount*16 || nsite > 1<<20 { + return frames, false + } + used := false + for i := uintptr(0); i < nsite; i++ { + site := (*runtimeFuncInfoEntryRecord)(unsafe.Pointer(start + i*size)) + if site == nil || site.pc == 0 || site.symbolID == 0 { + continue + } + funcIndex := indexBySymbolID[site.symbolID] + if funcIndex == 0 || uintptr(funcIndex) > runtimeFuncInfoCount { + continue + } + frames = append(frames, runtimeFuncPCFrame{ + entry: site.pc, + funcIndex: funcIndex, + }) + if entries[funcIndex] == 0 || site.pc < entries[funcIndex] { + entries[funcIndex] = site.pc + } + used = true + } + return frames, used +} + +func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame, indexBySymbolID map[uint64]uint32) []runtimeFuncPCFrame { if runtimeFuncInfoStubSiteStart == nil || runtimeFuncInfoStubSiteEnd == nil { return frames } @@ -718,7 +777,7 @@ func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame) []runtimeF if site == nil || site.pc == 0 || site.symbolID == 0 { continue } - funcIndex := funcInfoIndexForSymbolID(site.symbolID) + funcIndex := indexBySymbolID[site.symbolID] if funcIndex == 0 || uintptr(funcIndex) > runtimeFuncInfoCount { continue } @@ -730,17 +789,21 @@ func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame) []runtimeF return frames } -func funcInfoIndexForSymbolID(id uint64) uint32 { - if id == 0 || runtimeFuncInfoTable == nil || runtimeFuncInfoCount == 0 { - return 0 - } +func funcInfoIndexBySymbolID() map[uint64]uint32 { + indexBySymbolID := make(map[uint64]uint32, runtimeFuncInfoCount) for i := uintptr(0); i < runtimeFuncInfoCount; i++ { - rec := funcInfoAt(i) - if funcInfoSymbolIDFromRecord(rec) == id { - return uint32(i + 1) + id := funcInfoSymbolIDFromRecord(funcInfoAt(i)) + if id == 0 { + continue } + index := uint32(i + 1) + if prev, ok := indexBySymbolID[id]; ok && prev != index { + indexBySymbolID[id] = 0 + continue + } + indexBySymbolID[id] = index } - return 0 + return indexBySymbolID } func funcInfoSymbolIDFromRecord(rec *runtimeFuncInfoRecord) uint64 { @@ -930,14 +993,46 @@ func funcPCFrameForPC(pc uintptr) (pcSymbol, bool) { return pcSymbol{}, false } frame := runtimeFuncPCFrames[idx] - if frame.funcIndex == 0 || uintptr(frame.funcIndex) > runtimeFuncInfoCount { + return pcSymbolForFuncInfoIndex(pc, frame.entry, frame.funcIndex) +} + +func funcPCFrameForEntryPC(pc uintptr) (pcSymbol, bool) { + if pc == 0 { + return pcSymbol{}, false + } + initRuntimeFuncPCFrames() + frames := runtimeFuncPCFrames + if len(frames) == 0 { return pcSymbol{}, false } - fn := funcInfoAt(uintptr(frame.funcIndex) - 1) + lo, hi := 0, len(frames) + for lo < hi { + mid := int(uint(lo+hi) >> 1) + if frames[mid].entry >= pc { + hi = mid + } else { + lo = mid + 1 + } + } + if lo >= len(frames) { + return pcSymbol{}, false + } + frame := frames[lo] + if frame.entry != pc && frame.entry-pc > runtimeFuncPCEntrySlack { + return pcSymbol{}, false + } + return pcSymbolForFuncInfoIndex(pc, pc, frame.funcIndex) +} + +func pcSymbolForFuncInfoIndex(pc, entry uintptr, funcIndex uint32) (pcSymbol, bool) { + if funcIndex == 0 || uintptr(funcIndex) > runtimeFuncInfoCount { + return pcSymbol{}, false + } + fn := funcInfoAt(uintptr(funcIndex) - 1) line := int(fn.line) return pcSymbol{ pc: pc, - entry: frame.entry, + entry: entry, function: funcInfoFunctionName(fn), file: funcInfoFileName(fn), line: line, From 012095b428b149631128d417a4ce55c198889adc Mon Sep 17 00:00:00 2001 From: Li Jie Date: Wed, 1 Jul 2026 22:01:41 +0800 Subject: [PATCH 19/59] test: cover pcline metadata in dev lto coverage --- ssa/ssa_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ssa/ssa_test.go b/ssa/ssa_test.go index 1adb41cf88..3e81d24010 100644 --- a/ssa/ssa_test.go +++ b/ssa/ssa_test.go @@ -205,6 +205,12 @@ func TestFuncInfoMetadataDoesNotPreserveFunctions(t *testing.T) { } func TestPCLineMetadataEmission(t *testing.T) { + testPCLineMetadataEmission(t) +} + +func testPCLineMetadataEmission(t *testing.T) { + t.Helper() + prog := NewProgram(nil) pkg := prog.NewPackage("main", "main") @@ -332,6 +338,11 @@ func TestDevLTOGlobalDCEFuncInfoMetadata(t *testing.T) { testFuncInfoMetadataDoesNotBlockGlobalDCE(t) } +func TestDevLTOGlobalDCEPCLineMetadata(t *testing.T) { + requireGoGlobalDCE(t) + testPCLineMetadataEmission(t) +} + func requireGoGlobalDCE(t *testing.T) { t.Helper() } From 9d9041967020c24d9e228f1ab139139b2acfa8f3 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Wed, 1 Jul 2026 23:21:41 +0800 Subject: [PATCH 20/59] runtime: speed up funcinfo hot paths --- cl/caller_frame_test.go | 47 ++++++++++- cl/compile.go | 4 +- cl/instr.go | 29 ++++++- runtime/internal/lib/runtime/symtab.go | 105 +++++++++++++++++++------ 4 files changed, 154 insertions(+), 31 deletions(-) diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index b2a63fe47b..e5c9fd7f0a 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -61,10 +61,18 @@ func f() { _ = dbg.Stack() } name: "dot import", src: `package foo import . "runtime" -func f() { _ = FuncForPC(0) } +func f() { Caller(0) } `, want: true, }, + { + name: "runtime FuncForPC only", + src: `package foo +import "runtime" +func f() { _ = runtime.FuncForPC(0) } +`, + want: false, + }, { name: "blank import", src: `package foo @@ -178,7 +186,8 @@ func interfaceCaller(c callerIface) { interfaceDispatch(c) } func closureLayer(next func()) func() { return func() { next() } } func closureCaller() { closureLayer(closureLayer(direct))() } func stack() { _ = debug.Stack() } -func anonOnly() { func() { runtime.FuncForPC(0) }() } +func anonOnly() { func() { runtime.Caller(0) }() } +func funcForPCOnly() { _ = runtime.FuncForPC(0) } func leaf() {} func callFunc(f func()) { f() } func callFuncHot() { callFunc(leaf) } @@ -216,6 +225,9 @@ func plain() {} t.Fatalf("%s should not be tracked when resolved dynamic targets do not reach runtime stack APIs", name) } } + if runtimeCallerFuncs[ssapkg.Func("funcForPCOnly")] { + t.Fatal("FuncForPC-only function should not need caller frame tracking") + } if runtimeCallerFuncs[ssapkg.Func("plain")] { t.Fatal("plain function should not be tracked") } @@ -225,6 +237,12 @@ func plain() {} t.Fatalf("%s should be a runtime caller metadata function", name) } } + if isRuntimeCallerFrameName("FuncForPC") { + t.Fatal("FuncForPC should not require caller frame tracking") + } + if !isRuntimeCallerFrameName("Caller") { + t.Fatal("Caller should require caller frame tracking") + } if isRuntimeCallerName("Version") { t.Fatal("Version should not be a runtime caller metadata function") } @@ -239,6 +257,9 @@ func FuncForPC(pc uintptr) uintptr { return 0 } if !isRuntimeCallerFunc(rtpkg.Func("FuncForPC")) { t.Fatal("LLGo runtime lib FuncForPC should be treated as runtime metadata use") } + if isRuntimeCallerFrameFunc(rtpkg.Func("FuncForPC")) { + t.Fatal("FuncForPC should not require caller frame tracking") + } if isRuntimeCallerLookupFunc(rtpkg.Func("FuncForPC")) { t.Fatal("FuncForPC should not consume caller lookup tokens") } @@ -706,6 +727,28 @@ func f() {} if ir := pkg.Module().String(); strings.Contains(ir, "RecordCallerLocation") || strings.Contains(ir, "RecordPanicLocation") { t.Fatalf("packages without runtime stack APIs should not emit caller location tracking:\n%s", ir) } + + ssapkg, files = buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +import "runtime" +func f() { _ = runtime.FuncForPC(0) } +`) + prog = newLLSSAProg(t) + prog.Target().GOOS = "linux" + prog.Target().GOARCH = "amd64" + prog.EnableFuncInfoMetadata(true) + pkg, err = NewPackage(prog, ssapkg, files) + if err != nil { + t.Fatal(err) + } + ir := pkg.Module().String() + for _, bad := range []string{"RecordCallerLocation", "RecordPanicLocation", "PushCallerLocationFrame", `!llgo.pcline`} { + if strings.Contains(ir, bad) { + t.Fatalf("FuncForPC-only packages should not emit caller frame tracking %q:\n%s", bad, ir) + } + } + if !strings.Contains(ir, `!llgo.funcinfo = !{!`) { + t.Fatalf("FuncForPC-only packages should still emit funcinfo metadata:\n%s", ir) + } } func TestCompileRuntimeCallerLocationOnlyForRuntimePaths(t *testing.T) { diff --git a/cl/compile.go b/cl/compile.go index f3d2c21338..ff3f31689f 100644 --- a/cl/compile.go +++ b/cl/compile.go @@ -266,7 +266,7 @@ func filesUseRuntimeCaller(files []*ast.File) bool { return false } case *ast.Ident: - if (dotImports["runtime"] && isRuntimeCallerName(n.Name)) || + if (dotImports["runtime"] && isRuntimeCallerFrameName(n.Name)) || (dotImports["runtime/debug"] && n.Name == "Stack") { found = true return false @@ -284,7 +284,7 @@ func filesUseRuntimeCaller(files []*ast.File) bool { func runtimeCallerSelector(path, name string) bool { switch path { case "runtime": - return isRuntimeCallerName(name) + return isRuntimeCallerFrameName(name) case "runtime/debug": return name == "Stack" default: diff --git a/cl/instr.go b/cl/instr.go index 6db43beaea..136b514a47 100644 --- a/cl/instr.go +++ b/cl/instr.go @@ -1040,7 +1040,7 @@ func fnHasDirectRuntimeCaller(fn *ssa.Function) bool { if !ok { continue } - if isRuntimeCallerFunc(call.Common().StaticCallee()) { + if isRuntimeCallerFrameFunc(call.Common().StaticCallee()) { return true } } @@ -1057,7 +1057,7 @@ func (a *runtimeCallerAnalysis) fnMayReachRuntimeCaller(fn *ssa.Function) bool { if fn == nil { return false } - if isRuntimeCallerFunc(fn) { + if isRuntimeCallerFrameFunc(fn) { return true } if !a.funcs[fn] { @@ -1078,7 +1078,7 @@ func (a *runtimeCallerAnalysis) fnMayReachRuntimeCaller(fn *ssa.Function) bool { } callee := call.StaticCallee() switch { - case isRuntimeCallerFunc(callee): + case isRuntimeCallerFrameFunc(callee): reaches = true case callee != nil: reaches = a.fnMayReachRuntimeCaller(callee) @@ -1276,6 +1276,20 @@ func isRuntimeCallerFunc(fn *ssa.Function) bool { } } +func isRuntimeCallerFrameFunc(fn *ssa.Function) bool { + if fn == nil || fn.Pkg == nil || fn.Pkg.Pkg == nil { + return false + } + switch fn.Pkg.Pkg.Path() { + case "runtime", "github.com/goplus/llgo/runtime/internal/lib/runtime": + return isRuntimeCallerFrameName(fn.Name()) + case "runtime/debug": + return fn.Name() == "Stack" + default: + return false + } +} + func isRuntimeCallerLookupFunc(fn *ssa.Function) bool { if fn == nil || fn.Pkg == nil || fn.Pkg.Pkg == nil { return false @@ -1301,6 +1315,15 @@ func isRuntimeCallerName(name string) bool { } } +func isRuntimeCallerFrameName(name string) bool { + switch name { + case "Caller", "Callers", "CallersFrames", "Stack": + return true + default: + return false + } +} + func (p *context) runtimeCallerFrameName() string { if p == nil { return "" diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 098c42fbef..8328262902 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -229,6 +229,7 @@ type runtimePCLineFrame struct { var runtimePCLineInitState uint32 var runtimePCLineFrames []runtimePCLineFrame +var runtimePCLineIndex runtimePCPageIndex type runtimeFuncPCFrame struct { entry uintptr @@ -1132,7 +1133,9 @@ func initRuntimePCLineFramesOnce() { }) } sortRuntimePCLineFrames(frames) - runtimePCLineFrames = uniqueRuntimePCLineFrames(frames) + frames = uniqueRuntimePCLineFrames(frames) + runtimePCLineFrames = frames + runtimePCLineIndex = buildRuntimePCLineIndex(frames) } func pcLineInfoForID(id uint64) *runtimePCLineRecord { @@ -1234,28 +1237,93 @@ func uniqueRuntimePCLineFrames(frames []runtimePCLineFrame) []runtimePCLineFrame return out } -func pcLineFrameForPC(pc, entry uintptr) (pcSymbol, bool) { - if pc == 0 { - return pcSymbol{}, false +func buildRuntimePCLineIndex(frames []runtimePCLineFrame) runtimePCPageIndex { + if len(frames) == 0 { + return runtimePCPageIndex{} } - initRuntimePCLineFrames() + base := frames[0].pc >> runtimeFuncPCPageShift + last := frames[len(frames)-1].pc >> runtimeFuncPCPageShift + if last < base { + return runtimePCPageIndex{} + } + npages := last - base + 2 + if npages > 1<<20 && npages > uintptr(len(frames))*64 { + return runtimePCPageIndex{} + } + pages := make([]uint32, npages) + next := 0 + for page := range pages { + limit := (base + uintptr(page)) << runtimeFuncPCPageShift + for next < len(frames) && frames[next].pc < limit { + next++ + } + pages[page] = uint32(next) + } + return runtimePCPageIndex{base: base, pages: pages} +} + +func runtimePCLineFrameRange(pc uintptr) (int, int) { + frames := runtimePCLineFrames + lo, hi := 0, len(frames) + if pages := runtimePCLineIndex.pages; len(pages) != 0 { + page := pc >> runtimeFuncPCPageShift + if page >= runtimePCLineIndex.base { + off := page - runtimePCLineIndex.base + if off < uintptr(len(pages)) { + lo = int(pages[off]) + if off+1 < uintptr(len(pages)) { + hi = int(pages[off+1]) + } + if lo > 0 { + lo-- + } + if hi < len(frames) { + hi++ + } + } + } + } + return lo, hi +} + +func runtimePCLineFrameIndex(pc uintptr, exact bool) int { frames := runtimePCLineFrames if len(frames) == 0 { - return pcSymbol{}, false + return -1 } - lo, hi := 0, len(frames) + lo, hi := runtimePCLineFrameRange(pc) for lo < hi { mid := int(uint(lo+hi) >> 1) - if frames[mid].pc > pc { + if frames[mid].pc > pc || (exact && frames[mid].pc == pc) { hi = mid } else { lo = mid + 1 } } - if lo == 0 { + if exact { + if lo >= len(frames) || frames[lo].pc != pc { + return -1 + } + return lo + } + idx := lo - 1 + if idx < 0 { + return -1 + } + return idx +} + +func pcLineFrameForPC(pc, entry uintptr) (pcSymbol, bool) { + if pc == 0 { + return pcSymbol{}, false + } + initRuntimePCLineFrames() + frames := runtimePCLineFrames + idx := runtimePCLineFrameIndex(pc, false) + if idx < 0 { return pcSymbol{}, false } - frame := frames[lo-1] + frame := frames[idx] if entry != 0 && frame.entry != 0 && frame.entry != entry { return pcSymbol{}, false } @@ -1276,22 +1344,11 @@ func pcLineFrameForExactPC(pc uintptr) (pcSymbol, bool) { } initRuntimePCLineFrames() frames := runtimePCLineFrames - if len(frames) == 0 { - return pcSymbol{}, false - } - lo, hi := 0, len(frames) - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if frames[mid].pc >= pc { - hi = mid - } else { - lo = mid + 1 - } - } - if lo >= len(frames) || frames[lo].pc != pc { + idx := runtimePCLineFrameIndex(pc, true) + if idx < 0 { return pcSymbol{}, false } - frame := frames[lo] + frame := frames[idx] return pcSymbol{ pc: pc, entry: frame.entry, From 1b8bc5683562792c157a4871a7b133817f6504f6 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 02:20:39 +0800 Subject: [PATCH 21/59] runtime: avoid FuncForPC cache thrashing --- .../lib/runtime/pprof_runtime_stub_llgo.go | 55 ++++++++++--------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index 4548771399..efdc55c49c 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -2,11 +2,7 @@ package runtime -import ( - "unsafe" - - llrt "github.com/goplus/llgo/runtime/internal/runtime" -) +import llrt "github.com/goplus/llgo/runtime/internal/runtime" type StackRecord struct { Stack []uintptr @@ -88,27 +84,28 @@ func NumGoroutine() int { func SetCPUProfileRate(hz int) {} -const funcForPCCacheSize = 1024 +const funcForPCCacheSets = 1024 +const funcForPCCacheWays = 4 type funcForPCCacheEntry struct { pc uintptr fn *Func } -var funcForPCCache [funcForPCCacheSize]funcForPCCacheEntry +var funcForPCCache [funcForPCCacheSets][funcForPCCacheWays]funcForPCCacheEntry +var funcForPCCacheNext [funcForPCCacheSets]uint8 var funcForPCLast funcForPCCacheEntry func FuncForPC(pc uintptr) *Func { if fn := funcForPCLast.fn; fn != nil && funcForPCLast.pc == pc { return fn } - entry := (*funcForPCCacheEntry)(unsafe.Add( - unsafe.Pointer(&funcForPCCache[0]), - funcForPCCacheIndex(pc)*unsafe.Sizeof(funcForPCCacheEntry{}), - )) - if fn := entry.fn; fn != nil && entry.pc == pc { - funcForPCLast = funcForPCCacheEntry{pc: pc, fn: fn} - return fn + set := &funcForPCCache[funcForPCCacheIndex(pc)] + for i := 0; i < funcForPCCacheWays; i++ { + if fn := set[i].fn; fn != nil && set[i].pc == pc { + funcForPCLast = funcForPCCacheEntry{pc: pc, fn: fn} + return fn + } } return funcForPCSlow(pc) } @@ -124,14 +121,14 @@ func funcForPCSlow(pc uintptr) *Func { // Function-value PCs point at the real function entry. ELF funcinfo // entry-site anchors are emitted from LLVM IR and can land after the // backend prologue, so an exact entry PC may sort before its anchor. - // Prefer native symbol info only when it is an exact entry match; the - // section table below remains the normal fast fallback. - if sym := addrInfoSymbol(pc); sym.ok && sym.entry == pc && sym.function != "" { + // Prefer the section table when it can match within the entry slack; + // native symbol lookup is kept only as a fallback. + if sym, ok := funcPCFrameForEntryPC(pc); ok { fn := newFuncForPC(pc, sym) cacheFuncForPC(pc, fn) return fn } - if sym, ok := funcPCFrameForEntryPC(pc); ok { + if sym := addrInfoSymbol(pc); sym.ok && sym.entry == pc && sym.function != "" { fn := newFuncForPC(pc, sym) cacheFuncForPC(pc, fn) return fn @@ -170,15 +167,21 @@ func newFuncForPC(pc uintptr, sym pcSymbol) *Func { } func cacheFuncForPC(pc uintptr, fn *Func) { - entry := (*funcForPCCacheEntry)(unsafe.Add( - unsafe.Pointer(&funcForPCCache[0]), - funcForPCCacheIndex(pc)*unsafe.Sizeof(funcForPCCacheEntry{}), - )) - entry.fn = fn - entry.pc = pc - funcForPCLast = funcForPCCacheEntry{pc: pc, fn: fn} + setIndex := funcForPCCacheIndex(pc) + set := &funcForPCCache[setIndex] + for i := 0; i < funcForPCCacheWays; i++ { + if set[i].fn == nil || set[i].pc == pc { + set[i] = funcForPCCacheEntry{pc: pc, fn: fn} + funcForPCLast = set[i] + return + } + } + way := funcForPCCacheNext[setIndex] & (funcForPCCacheWays - 1) + funcForPCCacheNext[setIndex] = way + 1 + set[way] = funcForPCCacheEntry{pc: pc, fn: fn} + funcForPCLast = set[way] } func funcForPCCacheIndex(pc uintptr) uintptr { - return (pc >> 4) & (funcForPCCacheSize - 1) + return (pc >> 4) & (funcForPCCacheSets - 1) } From cf3335455c79ce2dcd49a17afdf56195728533a7 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 07:58:04 +0800 Subject: [PATCH 22/59] runtime: use Go-style funcinfo find index --- internal/build/funcinfo_table.go | 7 + internal/build/pclntab.go | 134 ------------- internal/build/pclntab_llvm.go | 38 ++++ internal/build/pclntab_llvm_test.go | 36 ++++ internal/build/pclntab_test.go | 83 --------- internal/pclntab/pclntab.go | 121 ++++++++++++ internal/pclntab/pclntab_test.go | 51 +++++ runtime/internal/lib/runtime/symtab.go | 248 +++++++++++++++++-------- 8 files changed, 424 insertions(+), 294 deletions(-) delete mode 100644 internal/build/pclntab.go create mode 100644 internal/build/pclntab_llvm.go create mode 100644 internal/build/pclntab_llvm_test.go delete mode 100644 internal/build/pclntab_test.go create mode 100644 internal/pclntab/pclntab.go create mode 100644 internal/pclntab/pclntab_test.go diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index c7a6d1498c..97b7d8cb96 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -554,6 +554,13 @@ func emitFuncInfoEntrySites(ctx *context, pkg llssa.Package) { if len(symbolIDs) == 0 { return } + // This is LLGo's DCE-safe substitute for the function PC list that Go's + // linker has while building pclntab. The inline-asm fragment lives in an + // associated ELF section tied to the function body, so global DCE removes + // the entry record with the function instead of keeping dead code alive. + // Runtime still sorts these final PCs before building the Go-style + // findfunc bucket index, because LLVM IR generation does not know final + // linked text order. llvmCtx := mod.Context() builder := llvmCtx.NewBuilder() defer builder.Dispose() diff --git a/internal/build/pclntab.go b/internal/build/pclntab.go deleted file mode 100644 index 60c2c97416..0000000000 --- a/internal/build/pclntab.go +++ /dev/null @@ -1,134 +0,0 @@ -//go:build !llgo -// +build !llgo - -package build - -import ( - "fmt" - - llvm "github.com/xgo-dev/llvm" -) - -const ( - pclnMinFuncSize = uint32(16) - pclnFuncTabBucketSize = uint32(256) * pclnMinFuncSize - pclnFindFuncSubbucket = 16 -) - -type pclnFuncTabEntry struct { - entryOff uint32 - funcOff uint32 -} - -type pclnFindFuncBucket struct { - idx uint32 - subbuckets [pclnFindFuncSubbucket]uint8 -} - -func buildPCLnFindFuncBuckets(ftab []pclnFuncTabEntry, textSize uint32) ([]pclnFindFuncBucket, error) { - if textSize == 0 { - return nil, nil - } - if len(ftab) < 2 { - return nil, fmt.Errorf("pclntab ftab needs at least one function and one sentinel") - } - for i := 1; i < len(ftab); i++ { - if ftab[i].entryOff <= ftab[i-1].entryOff { - return nil, fmt.Errorf("pclntab ftab entries must be strictly increasing") - } - } - if ftab[0].entryOff != 0 { - return nil, fmt.Errorf("pclntab first entry offset must be zero") - } - if ftab[len(ftab)-1].entryOff < textSize { - return nil, fmt.Errorf("pclntab sentinel offset %d below text size %d", ftab[len(ftab)-1].entryOff, textSize) - } - - nbuckets := int((textSize + pclnFuncTabBucketSize - 1) / pclnFuncTabBucketSize) - buckets := make([]pclnFindFuncBucket, nbuckets) - subSize := pclnFuncTabBucketSize / pclnFindFuncSubbucket - for b := range buckets { - bucketStart := uint32(b) * pclnFuncTabBucketSize - baseIdx := pclnFuncIndexForPC(ftab, bucketStart) - buckets[b].idx = uint32(baseIdx) - for s := 0; s < pclnFindFuncSubbucket; s++ { - pc := bucketStart + uint32(s)*subSize - if pc >= textSize { - pc = textSize - 1 - } - subIdx := pclnFuncIndexForPC(ftab, pc) - delta := subIdx - baseIdx - if delta < 0 || delta > 255 { - return nil, fmt.Errorf("pclntab subbucket delta overflow: bucket=%d subbucket=%d delta=%d", b, s, delta) - } - buckets[b].subbuckets[s] = uint8(delta) - } - } - return buckets, nil -} - -func pclnFuncIndexForPC(ftab []pclnFuncTabEntry, pcOff uint32) int { - lo, hi := 0, len(ftab)-1 // last entry is the sentinel. - for lo+1 < hi { - mid := int(uint(lo+hi) >> 1) - if ftab[mid].entryOff <= pcOff { - lo = mid - } else { - hi = mid - } - } - for lo+1 < len(ftab) && ftab[lo+1].entryOff <= pcOff { - lo++ - } - if lo >= len(ftab)-1 { - return len(ftab) - 2 - } - return lo -} - -func pclnLookupFuncIndex(ftab []pclnFuncTabEntry, buckets []pclnFindFuncBucket, pcOff uint32) int { - if len(ftab) < 2 || len(buckets) == 0 { - return -1 - } - bucket := pcOff / pclnFuncTabBucketSize - if bucket >= uint32(len(buckets)) { - return -1 - } - subSize := pclnFuncTabBucketSize / pclnFindFuncSubbucket - sub := (pcOff % pclnFuncTabBucketSize) / subSize - b := buckets[bucket] - idx := int(b.idx) + int(b.subbuckets[sub]) - for idx+1 < len(ftab) && ftab[idx+1].entryOff <= pcOff { - idx++ - } - if idx >= len(ftab)-1 { - return len(ftab) - 2 - } - return idx -} - -func emitPCLnFindFuncBuckets(mod llvm.Module, symbol string, buckets []pclnFindFuncBucket) llvm.Value { - ctx := mod.Context() - i8Type := ctx.Int8Type() - i32Type := ctx.Int32Type() - subType := llvm.ArrayType(i8Type, pclnFindFuncSubbucket) - bucketType := ctx.StructType([]llvm.Type{i32Type, subType}, false) - arrayType := llvm.ArrayType(bucketType, len(buckets)) - values := make([]llvm.Value, 0, len(buckets)) - for _, bucket := range buckets { - subs := make([]llvm.Value, 0, len(bucket.subbuckets)) - for _, sub := range bucket.subbuckets { - subs = append(subs, llvm.ConstInt(i8Type, uint64(sub), false)) - } - values = append(values, llvm.ConstNamedStruct(bucketType, []llvm.Value{ - llvm.ConstInt(i32Type, uint64(bucket.idx), false), - llvm.ConstArray(i8Type, subs), - })) - } - global := llvm.AddGlobal(mod, arrayType, symbol) - global.SetInitializer(llvm.ConstArray(bucketType, values)) - global.SetGlobalConstant(true) - global.SetUnnamedAddr(true) - global.SetAlignment(4) - return global -} diff --git a/internal/build/pclntab_llvm.go b/internal/build/pclntab_llvm.go new file mode 100644 index 0000000000..e7cc079ac3 --- /dev/null +++ b/internal/build/pclntab_llvm.go @@ -0,0 +1,38 @@ +//go:build !llgo +// +build !llgo + +package build + +import ( + "github.com/goplus/llgo/internal/pclntab" + llvm "github.com/xgo-dev/llvm" +) + +// emitPCLnFindFuncBuckets is the LLVM materialization layer for the Go-style +// findfunctab data produced by internal/pclntab. Keep the algorithm in that +// package; this function should only translate buckets into IR constants. +func emitPCLnFindFuncBuckets(mod llvm.Module, symbol string, buckets []pclntab.FindFuncBucket) llvm.Value { + ctx := mod.Context() + i8Type := ctx.Int8Type() + i32Type := ctx.Int32Type() + subType := llvm.ArrayType(i8Type, pclntab.FindFuncSubbucket) + bucketType := ctx.StructType([]llvm.Type{i32Type, subType}, false) + arrayType := llvm.ArrayType(bucketType, len(buckets)) + values := make([]llvm.Value, 0, len(buckets)) + for _, bucket := range buckets { + subs := make([]llvm.Value, 0, len(bucket.Subbuckets)) + for _, sub := range bucket.Subbuckets { + subs = append(subs, llvm.ConstInt(i8Type, uint64(sub), false)) + } + values = append(values, llvm.ConstNamedStruct(bucketType, []llvm.Value{ + llvm.ConstInt(i32Type, uint64(bucket.Idx), false), + llvm.ConstArray(i8Type, subs), + })) + } + global := llvm.AddGlobal(mod, arrayType, symbol) + global.SetInitializer(llvm.ConstArray(bucketType, values)) + global.SetGlobalConstant(true) + global.SetUnnamedAddr(true) + global.SetAlignment(4) + return global +} diff --git a/internal/build/pclntab_llvm_test.go b/internal/build/pclntab_llvm_test.go new file mode 100644 index 0000000000..1e74667238 --- /dev/null +++ b/internal/build/pclntab_llvm_test.go @@ -0,0 +1,36 @@ +//go:build !llgo +// +build !llgo + +package build + +import ( + "strings" + "testing" + + "github.com/goplus/llgo/internal/pclntab" + llvm "github.com/xgo-dev/llvm" +) + +func TestEmitPCLnFindFuncBuckets(t *testing.T) { + llvm.InitializeAllTargets() + ctx := llvm.NewContext() + defer ctx.Dispose() + mod := ctx.NewModule("pclntab-test") + defer mod.Dispose() + + buckets := []pclntab.FindFuncBucket{ + {Idx: 0, Subbuckets: [16]uint8{0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}}, + {Idx: 3, Subbuckets: [16]uint8{0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, + } + emitPCLnFindFuncBuckets(mod, "__llgo_findfunctab", buckets) + ir := mod.String() + for _, want := range []string{ + `@__llgo_findfunctab = unnamed_addr constant [2 x { i32, [16 x i8] }]`, + `{ i32 0, [16 x i8] c"\00\01\02`, + `{ i32 3, [16 x i8] c"\00\00\01`, + } { + if !strings.Contains(ir, want) { + t.Fatalf("IR missing %q:\n%s", want, ir) + } + } +} diff --git a/internal/build/pclntab_test.go b/internal/build/pclntab_test.go deleted file mode 100644 index 5eef7ffdfe..0000000000 --- a/internal/build/pclntab_test.go +++ /dev/null @@ -1,83 +0,0 @@ -//go:build !llgo -// +build !llgo - -package build - -import ( - "strings" - "testing" - - llvm "github.com/xgo-dev/llvm" -) - -func TestBuildPCLnFindFuncBucketsLookup(t *testing.T) { - ftab := []pclnFuncTabEntry{ - {entryOff: 0, funcOff: 11}, - {entryOff: 16, funcOff: 22}, - {entryOff: 64, funcOff: 33}, - {entryOff: 4096, funcOff: 44}, - {entryOff: 4352, funcOff: 55}, - {entryOff: 8192, funcOff: 0}, // sentinel - } - buckets, err := buildPCLnFindFuncBuckets(ftab, 8192) - if err != nil { - t.Fatalf("buildPCLnFindFuncBuckets: %v", err) - } - if got, want := len(buckets), 2; got != want { - t.Fatalf("bucket count = %d, want %d", got, want) - } - for _, tt := range []struct { - pc uint32 - want int - }{ - {pc: 0, want: 0}, - {pc: 15, want: 0}, - {pc: 16, want: 1}, - {pc: 63, want: 1}, - {pc: 64, want: 2}, - {pc: 4095, want: 2}, - {pc: 4096, want: 3}, - {pc: 4351, want: 3}, - {pc: 4352, want: 4}, - {pc: 8191, want: 4}, - } { - if got := pclnLookupFuncIndex(ftab, buckets, tt.pc); got != tt.want { - t.Fatalf("lookup(%d) = %d, want %d", tt.pc, got, tt.want) - } - } -} - -func TestBuildPCLnFindFuncBucketsRejectsOverflow(t *testing.T) { - ftab := make([]pclnFuncTabEntry, 0, 302) - for i := 0; i < 301; i++ { - ftab = append(ftab, pclnFuncTabEntry{entryOff: uint32(i), funcOff: uint32(i + 1)}) - } - ftab = append(ftab, pclnFuncTabEntry{entryOff: pclnFuncTabBucketSize, funcOff: 0}) - if _, err := buildPCLnFindFuncBuckets(ftab, pclnFuncTabBucketSize); err == nil { - t.Fatal("expected subbucket overflow error") - } -} - -func TestEmitPCLnFindFuncBuckets(t *testing.T) { - llvm.InitializeAllTargets() - ctx := llvm.NewContext() - defer ctx.Dispose() - mod := ctx.NewModule("pclntab-test") - defer mod.Dispose() - - buckets := []pclnFindFuncBucket{ - {idx: 0, subbuckets: [16]uint8{0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}}, - {idx: 3, subbuckets: [16]uint8{0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, - } - emitPCLnFindFuncBuckets(mod, "__llgo_findfunctab", buckets) - ir := mod.String() - for _, want := range []string{ - `@__llgo_findfunctab = unnamed_addr constant [2 x { i32, [16 x i8] }]`, - `{ i32 0, [16 x i8] c"\00\01\02`, - `{ i32 3, [16 x i8] c"\00\00\01`, - } { - if !strings.Contains(ir, want) { - t.Fatalf("IR missing %q:\n%s", want, ir) - } - } -} diff --git a/internal/pclntab/pclntab.go b/internal/pclntab/pclntab.go new file mode 100644 index 0000000000..9058c14830 --- /dev/null +++ b/internal/pclntab/pclntab.go @@ -0,0 +1,121 @@ +// Package pclntab contains the Go-style findfunc bucket/index algorithm used +// by LLGo runtime metadata. It is intentionally free of LLVM dependencies so +// build-time emitters and tests share one implementation of the pclntab logic. +package pclntab + +import "fmt" + +const ( + // These constants intentionally match Go's pclntab findfunc layout: + // cmd/link builds one 4096-byte text bucket, split into 16 256-byte + // subbuckets, and runtime.findfunc starts scanning from the recorded + // bucket base plus subbucket delta. + MinFuncSize = uint32(16) + FuncTabBucketSize = uint32(256) * MinFuncSize + FindFuncSubbucket = 16 +) + +// FuncTabEntry mirrors the two pieces of data Go's linker stores in functab: +// a PC offset sorted by final text address, and an opaque function metadata +// offset. LLGo's current caller uses FuncOff as a payload index. +type FuncTabEntry struct { + EntryOff uint32 + FuncOff uint32 +} + +// FindFuncBucket mirrors runtime.findfuncbucket: one uint32 base function +// index plus 16 one-byte deltas into the sorted functab. +type FindFuncBucket struct { + Idx uint32 + Subbuckets [FindFuncSubbucket]uint8 +} + +// BuildFindFuncBuckets ports Go's cmd/link findfunctab construction for a +// sorted functab. It deliberately stays independent of LLVM so build/link code +// can use it without duplicating the algorithm. +func BuildFindFuncBuckets(ftab []FuncTabEntry, textSize uint32) ([]FindFuncBucket, error) { + if textSize == 0 { + return nil, nil + } + if len(ftab) < 2 { + return nil, fmt.Errorf("pclntab ftab needs at least one function and one sentinel") + } + for i := 1; i < len(ftab); i++ { + if ftab[i].EntryOff <= ftab[i-1].EntryOff { + return nil, fmt.Errorf("pclntab ftab entries must be strictly increasing") + } + } + if ftab[0].EntryOff != 0 { + return nil, fmt.Errorf("pclntab first entry offset must be zero") + } + if ftab[len(ftab)-1].EntryOff < textSize { + return nil, fmt.Errorf("pclntab sentinel offset %d below text size %d", ftab[len(ftab)-1].EntryOff, textSize) + } + + nbuckets := int((textSize + FuncTabBucketSize - 1) / FuncTabBucketSize) + buckets := make([]FindFuncBucket, nbuckets) + subSize := FuncTabBucketSize / FindFuncSubbucket + for b := range buckets { + bucketStart := uint32(b) * FuncTabBucketSize + baseIdx := FuncIndexForPC(ftab, bucketStart) + buckets[b].Idx = uint32(baseIdx) + for s := 0; s < FindFuncSubbucket; s++ { + pc := bucketStart + uint32(s)*subSize + if pc >= textSize { + pc = textSize - 1 + } + subIdx := FuncIndexForPC(ftab, pc) + delta := subIdx - baseIdx + if delta < 0 || delta > 255 { + return nil, fmt.Errorf("pclntab subbucket delta overflow: bucket=%d subbucket=%d delta=%d", b, s, delta) + } + buckets[b].Subbuckets[s] = uint8(delta) + } + } + return buckets, nil +} + +// FuncIndexForPC is the slow reference lookup over the sorted functab. It is +// kept for tests and for building the compact bucket table. +func FuncIndexForPC(ftab []FuncTabEntry, pcOff uint32) int { + lo, hi := 0, len(ftab)-1 // last entry is the sentinel. + for lo+1 < hi { + mid := int(uint(lo+hi) >> 1) + if ftab[mid].EntryOff <= pcOff { + lo = mid + } else { + hi = mid + } + } + for lo+1 < len(ftab) && ftab[lo+1].EntryOff <= pcOff { + lo++ + } + if lo >= len(ftab)-1 { + return len(ftab) - 2 + } + return lo +} + +// LookupFuncIndex mirrors runtime.findfunc's hot lookup: use the bucket and +// subbucket to jump near the target function, then linearly scan the remaining +// entries in that small range. +func LookupFuncIndex(ftab []FuncTabEntry, buckets []FindFuncBucket, pcOff uint32) int { + if len(ftab) < 2 || len(buckets) == 0 { + return -1 + } + bucket := pcOff / FuncTabBucketSize + if bucket >= uint32(len(buckets)) { + return -1 + } + subSize := FuncTabBucketSize / FindFuncSubbucket + sub := (pcOff % FuncTabBucketSize) / subSize + b := buckets[bucket] + idx := int(b.Idx) + int(b.Subbuckets[sub]) + for idx+1 < len(ftab) && ftab[idx+1].EntryOff <= pcOff { + idx++ + } + if idx >= len(ftab)-1 { + return len(ftab) - 2 + } + return idx +} diff --git a/internal/pclntab/pclntab_test.go b/internal/pclntab/pclntab_test.go new file mode 100644 index 0000000000..26903ebbea --- /dev/null +++ b/internal/pclntab/pclntab_test.go @@ -0,0 +1,51 @@ +package pclntab + +import "testing" + +func TestBuildFindFuncBucketsLookup(t *testing.T) { + ftab := []FuncTabEntry{ + {EntryOff: 0, FuncOff: 11}, + {EntryOff: 16, FuncOff: 22}, + {EntryOff: 64, FuncOff: 33}, + {EntryOff: 4096, FuncOff: 44}, + {EntryOff: 4352, FuncOff: 55}, + {EntryOff: 8192, FuncOff: 0}, // sentinel + } + buckets, err := BuildFindFuncBuckets(ftab, 8192) + if err != nil { + t.Fatalf("BuildFindFuncBuckets: %v", err) + } + if got, want := len(buckets), 2; got != want { + t.Fatalf("bucket count = %d, want %d", got, want) + } + for _, tt := range []struct { + pc uint32 + want int + }{ + {pc: 0, want: 0}, + {pc: 15, want: 0}, + {pc: 16, want: 1}, + {pc: 63, want: 1}, + {pc: 64, want: 2}, + {pc: 4095, want: 2}, + {pc: 4096, want: 3}, + {pc: 4351, want: 3}, + {pc: 4352, want: 4}, + {pc: 8191, want: 4}, + } { + if got := LookupFuncIndex(ftab, buckets, tt.pc); got != tt.want { + t.Fatalf("lookup(%d) = %d, want %d", tt.pc, got, tt.want) + } + } +} + +func TestBuildFindFuncBucketsRejectsOverflow(t *testing.T) { + ftab := make([]FuncTabEntry, 0, 302) + for i := 0; i < 301; i++ { + ftab = append(ftab, FuncTabEntry{EntryOff: uint32(i), FuncOff: uint32(i + 1)}) + } + ftab = append(ftab, FuncTabEntry{EntryOff: FuncTabBucketSize, FuncOff: 0}) + if _, err := BuildFindFuncBuckets(ftab, FuncTabBucketSize); err == nil { + t.Fatal("expected subbucket overflow error") + } +} diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 8328262902..49dd77b841 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -229,25 +229,39 @@ type runtimePCLineFrame struct { var runtimePCLineInitState uint32 var runtimePCLineFrames []runtimePCLineFrame -var runtimePCLineIndex runtimePCPageIndex +var runtimePCLineIndex runtimePCFindIndex type runtimeFuncPCFrame struct { entry uintptr funcIndex uint32 } -type runtimePCPageIndex struct { - base uintptr - pages []uint32 +type runtimePCFindBucket struct { + idx uint32 + subbuckets [runtimePCFindSubbucket]uint8 } -const runtimeFuncPCPageShift = 12 -const runtimeFuncPCEntrySlack = 64 +type runtimePCFindIndex struct { + base uintptr + buckets []runtimePCFindBucket +} + +const ( + // Keep the lookup geometry aligned with Go's pclntab findfunc table: + // 4096-byte buckets, 16 subbuckets, and one-byte function-index deltas. + // LLGo currently builds this compact index at first use after reading + // DCE-safe entry PC sections, because the LLVM IR stage does not yet own + // final text addresses the way cmd/link does for Go. + runtimePCMinFuncSize = uintptr(16) + runtimePCFindBucketSize = uintptr(256) * runtimePCMinFuncSize + runtimePCFindSubbucket = 16 + runtimeFuncPCEntrySlack = 64 +) var runtimeFuncPCInitState uint32 var runtimeFuncPCFrames []runtimeFuncPCFrame var runtimeFuncPCEntries []uintptr -var runtimeFuncPCIndex runtimePCPageIndex +var runtimeFuncPCIndex runtimePCFindIndex const ( runtimeFuncInfoInitUninit uint32 = iota @@ -909,55 +923,120 @@ func uniqueRuntimeFuncPCFrames(frames []runtimeFuncPCFrame) []runtimeFuncPCFrame return out } -func buildRuntimeFuncPCIndex(frames []runtimeFuncPCFrame) runtimePCPageIndex { +// buildRuntimeFuncPCIndex is the runtime counterpart of Go's linker-built +// findfunctab. The table shape and lookup behavior are Go-style; the build time +// differs because LLGo's final function PCs are discovered from associated +// sections after link/load instead of being sorted directly by cmd/link. +func buildRuntimeFuncPCIndex(frames []runtimeFuncPCFrame) runtimePCFindIndex { if len(frames) == 0 { - return runtimePCPageIndex{} + return runtimePCFindIndex{} + } + if uintptr(len(frames)) > ^uintptr(0)>>1 { + return runtimePCFindIndex{} } - base := frames[0].entry >> runtimeFuncPCPageShift - last := frames[len(frames)-1].entry >> runtimeFuncPCPageShift + base := frames[0].entry &^ (runtimePCFindBucketSize - 1) + last := frames[len(frames)-1].entry if last < base { - return runtimePCPageIndex{} + return runtimePCFindIndex{} + } + nbuckets := (last-base)/runtimePCFindBucketSize + 1 + if nbuckets > 1<<20 && nbuckets > uintptr(len(frames))*64 { + return runtimePCFindIndex{} + } + buckets := make([]runtimePCFindBucket, nbuckets) + subSize := runtimePCFindBucketSize / runtimePCFindSubbucket + for b := range buckets { + bucketStart := base + uintptr(b)*runtimePCFindBucketSize + baseIdx := runtimeFuncPCFrameIndexBinary(frames, bucketStart) + if baseIdx < 0 { + baseIdx = 0 + } + if baseIdx > len(frames)-1 { + baseIdx = len(frames) - 1 + } + buckets[b].idx = uint32(baseIdx) + for s := 0; s < runtimePCFindSubbucket; s++ { + pc := bucketStart + uintptr(s)*subSize + subIdx := runtimeFuncPCFrameIndexBinary(frames, pc) + if subIdx < 0 { + subIdx = 0 + } + if subIdx > len(frames)-1 { + subIdx = len(frames) - 1 + } + delta := subIdx - baseIdx + if delta < 0 || delta > 255 { + return runtimePCFindIndex{} + } + buckets[b].subbuckets[s] = uint8(delta) + } } - npages := last - base + 2 - if npages > 1<<20 && npages > uintptr(len(frames))*64 { - return runtimePCPageIndex{} + return runtimePCFindIndex{base: base, buckets: buckets} +} + +func runtimePCFindRange(index runtimePCFindIndex, n int, pc uintptr) (int, int, bool) { + if n == 0 || len(index.buckets) == 0 || pc < index.base { + return 0, 0, false } - pages := make([]uint32, npages) - next := 0 - for page := range pages { - limit := (base + uintptr(page)) << runtimeFuncPCPageShift - for next < len(frames) && frames[next].entry < limit { - next++ - } - pages[page] = uint32(next) + off := pc - index.base + bucket := off / runtimePCFindBucketSize + if bucket >= uintptr(len(index.buckets)) { + return 0, 0, false + } + subSize := runtimePCFindBucketSize / runtimePCFindSubbucket + sub := (off % runtimePCFindBucketSize) / subSize + b := index.buckets[bucket] + lo := int(b.idx) + int(b.subbuckets[sub]) + hi := n + if sub+1 < runtimePCFindSubbucket { + hi = int(b.idx) + int(b.subbuckets[sub+1]) + } else if bucket+1 < uintptr(len(index.buckets)) { + hi = int(index.buckets[bucket+1].idx) + } + if lo > 0 { + lo-- + } + if hi < lo { + hi = lo + } + hi += 2 + if hi > n { + hi = n } - return runtimePCPageIndex{base: base, pages: pages} + if lo > n { + lo = n + } + return lo, hi, true } +// runtimeFuncPCFrameIndex mirrors runtime.findfunc: use the compact bucket +// table to jump near the containing function, then scan the sorted frame table +// inside that narrow range. func runtimeFuncPCFrameIndex(pc uintptr) int { frames := runtimeFuncPCFrames if len(frames) == 0 { return -1 } - lo, hi := 0, len(frames) - if pages := runtimeFuncPCIndex.pages; len(pages) != 0 { - page := pc >> runtimeFuncPCPageShift - if page >= runtimeFuncPCIndex.base { - off := page - runtimeFuncPCIndex.base - if off < uintptr(len(pages)) { - lo = int(pages[off]) - if off+1 < uintptr(len(pages)) { - hi = int(pages[off+1]) - } - if lo > 0 { - lo-- - } - if hi < len(frames) { - hi++ - } + if lo, hi, ok := runtimePCFindRange(runtimeFuncPCIndex, len(frames), pc); ok { + for lo < hi { + mid := int(uint(lo+hi) >> 1) + if frames[mid].entry > pc { + hi = mid + } else { + lo = mid + 1 } } + idx := lo - 1 + if idx < 0 || frames[idx].entry > pc { + return -1 + } + return idx } + return runtimeFuncPCFrameIndexBinary(frames, pc) +} + +func runtimeFuncPCFrameIndexBinary(frames []runtimeFuncPCFrame, pc uintptr) int { + lo, hi := 0, len(frames) for lo < hi { mid := int(uint(lo+hi) >> 1) if frames[mid].entry > pc { @@ -1237,53 +1316,60 @@ func uniqueRuntimePCLineFrames(frames []runtimePCLineFrame) []runtimePCLineFrame return out } -func buildRuntimePCLineIndex(frames []runtimePCLineFrame) runtimePCPageIndex { +// buildRuntimePCLineIndex reuses the same Go-style bucket geometry for +// statement PC-line sites. Go stores dense per-function pcdata; LLGo keeps +// statement sites as a separate sorted table for now, but the hot PC lookup +// follows the same bucket/subbucket narrowing. +func buildRuntimePCLineIndex(frames []runtimePCLineFrame) runtimePCFindIndex { if len(frames) == 0 { - return runtimePCPageIndex{} + return runtimePCFindIndex{} } - base := frames[0].pc >> runtimeFuncPCPageShift - last := frames[len(frames)-1].pc >> runtimeFuncPCPageShift + base := frames[0].pc &^ (runtimePCFindBucketSize - 1) + last := frames[len(frames)-1].pc if last < base { - return runtimePCPageIndex{} - } - npages := last - base + 2 - if npages > 1<<20 && npages > uintptr(len(frames))*64 { - return runtimePCPageIndex{} - } - pages := make([]uint32, npages) - next := 0 - for page := range pages { - limit := (base + uintptr(page)) << runtimeFuncPCPageShift - for next < len(frames) && frames[next].pc < limit { - next++ + return runtimePCFindIndex{} + } + nbuckets := (last-base)/runtimePCFindBucketSize + 1 + if nbuckets > 1<<20 && nbuckets > uintptr(len(frames))*64 { + return runtimePCFindIndex{} + } + buckets := make([]runtimePCFindBucket, nbuckets) + subSize := runtimePCFindBucketSize / runtimePCFindSubbucket + for b := range buckets { + bucketStart := base + uintptr(b)*runtimePCFindBucketSize + baseIdx := runtimePCLineFrameIndexBinary(frames, bucketStart, false) + if baseIdx < 0 { + baseIdx = 0 + } + if baseIdx > len(frames)-1 { + baseIdx = len(frames) - 1 + } + buckets[b].idx = uint32(baseIdx) + for s := 0; s < runtimePCFindSubbucket; s++ { + pc := bucketStart + uintptr(s)*subSize + subIdx := runtimePCLineFrameIndexBinary(frames, pc, false) + if subIdx < 0 { + subIdx = 0 + } + if subIdx > len(frames)-1 { + subIdx = len(frames) - 1 + } + delta := subIdx - baseIdx + if delta < 0 || delta > 255 { + return runtimePCFindIndex{} + } + buckets[b].subbuckets[s] = uint8(delta) } - pages[page] = uint32(next) } - return runtimePCPageIndex{base: base, pages: pages} + return runtimePCFindIndex{base: base, buckets: buckets} } func runtimePCLineFrameRange(pc uintptr) (int, int) { frames := runtimePCLineFrames - lo, hi := 0, len(frames) - if pages := runtimePCLineIndex.pages; len(pages) != 0 { - page := pc >> runtimeFuncPCPageShift - if page >= runtimePCLineIndex.base { - off := page - runtimePCLineIndex.base - if off < uintptr(len(pages)) { - lo = int(pages[off]) - if off+1 < uintptr(len(pages)) { - hi = int(pages[off+1]) - } - if lo > 0 { - lo-- - } - if hi < len(frames) { - hi++ - } - } - } + if lo, hi, ok := runtimePCFindRange(runtimePCLineIndex, len(frames), pc); ok { + return lo, hi } - return lo, hi + return 0, len(frames) } func runtimePCLineFrameIndex(pc uintptr, exact bool) int { @@ -1292,6 +1378,14 @@ func runtimePCLineFrameIndex(pc uintptr, exact bool) int { return -1 } lo, hi := runtimePCLineFrameRange(pc) + return runtimePCLineFrameIndexInRange(frames, pc, exact, lo, hi) +} + +func runtimePCLineFrameIndexBinary(frames []runtimePCLineFrame, pc uintptr, exact bool) int { + return runtimePCLineFrameIndexInRange(frames, pc, exact, 0, len(frames)) +} + +func runtimePCLineFrameIndexInRange(frames []runtimePCLineFrame, pc uintptr, exact bool, lo, hi int) int { for lo < hi { mid := int(uint(lo+hi) >> 1) if frames[mid].pc > pc || (exact && frames[mid].pc == pc) { From bb1b1a3fa11aaa14769e3af64de82ecc4a5446b7 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 09:52:55 +0800 Subject: [PATCH 23/59] runtime: use static funcinfo symbol index --- internal/build/funcinfo_table.go | 79 +++++++++++++++++++++++ internal/build/funcinfo_table_test.go | 7 ++ runtime/internal/lib/runtime/symtab.go | 89 +++++++++++--------------- 3 files changed, 123 insertions(+), 52 deletions(-) diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index 97b7d8cb96..de89bab7c3 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -34,6 +34,8 @@ const ( funcInfoStringCountSymbol = "__llgo_funcinfo_string_count" funcInfoHashSymbol = "__llgo_funcinfo_hash" funcInfoHashMaskSymbol = "__llgo_funcinfo_hash_mask" + funcInfoSymbolIndexSymbol = "__llgo_funcinfo_symbol_index" + funcInfoSymbolIndexCountSymbol = "__llgo_funcinfo_symbol_index_count" funcInfoStubIndexesSymbol = "__llgo_funcinfo_stub_indexes" funcInfoStubCountSymbol = "__llgo_funcinfo_stub_count" funcInfoEntryStartPtrSymbol = "__llgo_funcinfo_entry_start" @@ -55,6 +57,7 @@ const ( funcInfoStringsDataSymbol = "__llgo_funcinfo_strings$data" funcInfoStringOffsetsDataSymbol = "__llgo_funcinfo_string_offsets$data" funcInfoHashDataSymbol = "__llgo_funcinfo_hash$data" + funcInfoSymbolIndexDataSymbol = "__llgo_funcinfo_symbol_index$data" funcInfoStubIndexesDataSymbol = "__llgo_funcinfo_stub_indexes$data" closureStubPrefix = "__llgo_stub." ) @@ -80,6 +83,11 @@ type funcInfoStubRecord struct { funcIndex uint32 } +type funcInfoSymbolIndexRecord struct { + symbolID uint64 + funcIndex uint32 +} + func collectFuncInfo(pkgs []Package) []funcInfoRecord { seen := make(map[string]funcInfoRecord) for _, pkg := range pkgs { @@ -181,6 +189,39 @@ func collectFuncInfoStubRecords(pkgs []Package, records []funcInfoRecord) []func return out } +func collectFuncInfoSymbolIndexRecords(records []funcInfoRecord) []funcInfoSymbolIndexRecord { + if len(records) == 0 { + return nil + } + seen := make(map[uint64]uint32, len(records)) + for i, rec := range records { + if rec.symbol == "" { + continue + } + id := funcInfoSymbolID(rec.symbol) + idx := uint32(i + 1) + if prev, ok := seen[id]; ok && prev != idx { + seen[id] = 0 + continue + } + seen[id] = idx + } + if len(seen) == 0 { + return nil + } + out := make([]funcInfoSymbolIndexRecord, 0, len(seen)) + for id, idx := range seen { + if id == 0 || idx == 0 { + continue + } + out = append(out, funcInfoSymbolIndexRecord{symbolID: id, funcIndex: idx}) + } + sort.Slice(out, func(i, j int) bool { + return out[i].symbolID < out[j].symbolID + }) + return out +} + func prepareFuncInfoTableRecords(records []funcInfoRecord, liveSymbols map[string]none) []funcInfoRecord { if len(records) == 0 { return nil @@ -278,6 +319,10 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord i32Type, i32Type, }, false) + symbolIndexRecordType := llvmCtx.StructType([]llvm.Type{ + i64Type, + i32Type, + }, false) funcEntryRecordType := llvmCtx.StructType([]llvm.Type{ llvm.PointerType(i8Type, 0), i64Type, @@ -303,7 +348,9 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord stringOffsetsPtr := llvm.AddGlobal(mod, llvm.PointerType(i32Type, 0), funcInfoStringOffsetsSymbol) stringCount := llvm.AddGlobal(mod, countType, funcInfoStringCountSymbol) hashPtr := llvm.AddGlobal(mod, llvm.PointerType(i16Type, 0), funcInfoHashSymbol) + symbolIndexPtr := llvm.AddGlobal(mod, llvm.PointerType(symbolIndexRecordType, 0), funcInfoSymbolIndexSymbol) count := llvm.AddGlobal(mod, countType, funcInfoCountSymbol) + symbolIndexCount := llvm.AddGlobal(mod, countType, funcInfoSymbolIndexCountSymbol) stubIndexesPtr := llvm.AddGlobal(mod, llvm.PointerType(i32Type, 0), funcInfoStubIndexesSymbol) stubCount := llvm.AddGlobal(mod, countType, funcInfoStubCountSymbol) pcLineCount := llvm.AddGlobal(mod, countType, pcLineCountSymbol) @@ -321,7 +368,9 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord stringOffsetsPtr.SetInitializer(llvm.ConstPointerNull(stringOffsetsPtr.GlobalValueType())) stringCount.SetInitializer(llvm.ConstInt(countType, 0, false)) hashPtr.SetInitializer(llvm.ConstPointerNull(hashPtr.GlobalValueType())) + symbolIndexPtr.SetInitializer(llvm.ConstPointerNull(symbolIndexPtr.GlobalValueType())) count.SetInitializer(llvm.ConstInt(countType, 0, false)) + symbolIndexCount.SetInitializer(llvm.ConstInt(countType, 0, false)) stubIndexesPtr.SetInitializer(llvm.ConstPointerNull(stubIndexesPtr.GlobalValueType())) stubCount.SetInitializer(llvm.ConstInt(countType, 0, false)) pcLineCount.SetInitializer(llvm.ConstInt(countType, 0, false)) @@ -346,7 +395,9 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord stringOffsetsPtr.SetInitializer(llvm.ConstPointerNull(stringOffsetsPtr.GlobalValueType())) stringCount.SetInitializer(llvm.ConstInt(countType, 0, false)) hashPtr.SetInitializer(llvm.ConstPointerNull(hashPtr.GlobalValueType())) + symbolIndexPtr.SetInitializer(llvm.ConstPointerNull(symbolIndexPtr.GlobalValueType())) count.SetInitializer(llvm.ConstInt(countType, 0, false)) + symbolIndexCount.SetInitializer(llvm.ConstInt(countType, 0, false)) stubIndexesPtr.SetInitializer(llvm.ConstPointerNull(stubIndexesPtr.GlobalValueType())) stubCount.SetInitializer(llvm.ConstInt(countType, 0, false)) pcLineCount.SetInitializer(llvm.ConstInt(countType, 0, false)) @@ -489,6 +540,34 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord hashMask.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Hash)-1), false)) } count.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.Records)), false)) + symbolIndexRecords := collectFuncInfoSymbolIndexRecords(records) + symbolIndexValues := make([]llvm.Value, 0, len(symbolIndexRecords)) + for _, rec := range symbolIndexRecords { + if rec.funcIndex == 0 || int(rec.funcIndex) > len(encoded.Records) { + continue + } + symbolIndexValues = append(symbolIndexValues, llvm.ConstNamedStruct(symbolIndexRecordType, []llvm.Value{ + llvm.ConstInt(i64Type, rec.symbolID, false), + llvm.ConstInt(i32Type, uint64(rec.funcIndex), false), + })) + } + if len(symbolIndexValues) == 0 { + symbolIndexPtr.SetInitializer(llvm.ConstPointerNull(symbolIndexPtr.GlobalValueType())) + symbolIndexCount.SetInitializer(llvm.ConstInt(countType, 0, false)) + } else { + symbolIndexArrayType := llvm.ArrayType(symbolIndexRecordType, len(symbolIndexValues)) + symbolIndexData := llvm.AddGlobal(mod, symbolIndexArrayType, funcInfoSymbolIndexDataSymbol) + symbolIndexData.SetInitializer(llvm.ConstArray(symbolIndexRecordType, symbolIndexValues)) + symbolIndexData.SetLinkage(llvm.PrivateLinkage) + symbolIndexData.SetGlobalConstant(true) + symbolIndexData.SetUnnamedAddr(true) + symbolIndexData.SetAlignment(8) + symbolIndexPtr.SetInitializer(llvm.ConstInBoundsGEP(symbolIndexArrayType, symbolIndexData, []llvm.Value{ + llvm.ConstInt(countType, 0, false), + llvm.ConstInt(countType, 0, false), + })) + symbolIndexCount.SetInitializer(llvm.ConstInt(countType, uint64(len(symbolIndexValues)), false)) + } stubIndexSeen := make(map[uint32]none, len(stubRecords)) stubIndexValues := make([]llvm.Value, 0, len(stubRecords)) for _, stub := range stubRecords { diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index 1c7e3cb3aa..adfab2849b 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -63,7 +63,9 @@ func TestFuncInfoTableMaterializesMetadataWithoutFunctionPointers(t *testing.T) "@__llgo_funcinfo_string_offsets = global ptr", "@__llgo_funcinfo_string_count = global i64 5", "@__llgo_funcinfo_hash = global ptr", + "@__llgo_funcinfo_symbol_index = global ptr", "@__llgo_funcinfo_count = global i64 1", + "@__llgo_funcinfo_symbol_index_count = global i64 1", "@__llgo_funcinfo_entry_start = global ptr @__start_llgo_funcinfo_entry", "@__llgo_funcinfo_entry_end = global ptr @__stop_llgo_funcinfo_entry", "@__llgo_funcinfo_stub_indexes = global ptr null", @@ -74,6 +76,7 @@ func TestFuncInfoTableMaterializesMetadataWithoutFunctionPointers(t *testing.T) `@"__llgo_funcinfo_table$data" = private unnamed_addr constant [1 x { i16, i16, i16, i16, i16, i16, i32 }]`, `@"__llgo_funcinfo_string_offsets$data" = private unnamed_addr constant`, `@"__llgo_funcinfo_hash$data" = private unnamed_addr constant [2 x i16]`, + `@"__llgo_funcinfo_symbol_index$data" = private unnamed_addr constant [1 x { i64, i32 }]`, `example.com/p\00`, `live\00`, `Live\00`, @@ -219,6 +222,8 @@ func TestFuncInfoTableMaterializesClosureStubIndexes(t *testing.T) { for _, want := range []string{ "@__llgo_funcinfo_stub_indexes = global ptr", "@__llgo_funcinfo_stub_count = global i64 1", + "@__llgo_funcinfo_symbol_index = global ptr", + "@__llgo_funcinfo_symbol_index_count = global i64 2", "@__llgo_funcinfo_stubsite_start = global ptr @__start_llgo_funcinfo_stubsite", "@__llgo_funcinfo_stubsite_end = global ptr @__stop_llgo_funcinfo_stubsite", `@"__llgo_funcinfo_stub_indexes$data" = private unnamed_addr constant [1 x i32]`, @@ -379,7 +384,9 @@ func TestFuncInfoTableEmptyDefinitions(t *testing.T) { "@__llgo_funcinfo_string_offsets = global ptr null", "@__llgo_funcinfo_string_count = global i64 0", "@__llgo_funcinfo_hash = global ptr null", + "@__llgo_funcinfo_symbol_index = global ptr null", "@__llgo_funcinfo_count = global i64 0", + "@__llgo_funcinfo_symbol_index_count = global i64 0", "@__llgo_funcinfo_entry_start = global ptr null", "@__llgo_funcinfo_entry_end = global ptr null", "@__llgo_funcinfo_stub_indexes = global ptr null", diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 49dd77b841..c56d3bba5c 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -166,6 +166,17 @@ var runtimeFuncInfoCount uintptr //go:linkname runtimeFuncInfoHashMask __llgo_funcinfo_hash_mask var runtimeFuncInfoHashMask uintptr +type runtimeFuncInfoSymbolIndexRecord struct { + symbolID uint64 + funcIndex uint32 +} + +//go:linkname runtimeFuncInfoSymbolIndex __llgo_funcinfo_symbol_index +var runtimeFuncInfoSymbolIndex *runtimeFuncInfoSymbolIndexRecord + +//go:linkname runtimeFuncInfoSymbolIndexCount __llgo_funcinfo_symbol_index_count +var runtimeFuncInfoSymbolIndexCount uintptr + //go:linkname runtimeFuncInfoStubIndexes __llgo_funcinfo_stub_indexes var runtimeFuncInfoStubIndexes *uint32 @@ -681,11 +692,7 @@ func initRuntimeFuncPCFramesOnce() { } frames := make([]runtimeFuncPCFrame, 0, runtimeFuncInfoCount) entries := make([]uintptr, runtimeFuncInfoCount+1) - var indexBySymbolID map[uint64]uint32 - if runtimeFuncInfoEntryStart != nil || runtimeFuncInfoStubSiteStart != nil { - indexBySymbolID = funcInfoIndexBySymbolID() - } - frames, usedEntrySites := appendRuntimeFuncInfoEntryFrames(frames, entries, indexBySymbolID) + frames, usedEntrySites := appendRuntimeFuncInfoEntryFrames(frames, entries) symbolBuf := []byte(nil) if !usedEntrySites { symbolBuf = make([]byte, 0, maxFuncInfoSymbolLen()+len(runtimeClosureStubPrefix)+1) @@ -705,7 +712,7 @@ func initRuntimeFuncPCFramesOnce() { } } } - frames = appendRuntimeFuncInfoStubSiteFrames(frames, indexBySymbolID) + frames = appendRuntimeFuncInfoStubSiteFrames(frames) // Closure stubs are an ABI adapter and may go away in a future closure // lowering. Keep the fallback compatibility table light: it stores only // target funcinfo record indexes. On ELF we prefer the associated stub-site @@ -737,7 +744,7 @@ func initRuntimeFuncPCFramesOnce() { runtimeFuncPCIndex = buildRuntimeFuncPCIndex(frames) } -func appendRuntimeFuncInfoEntryFrames(frames []runtimeFuncPCFrame, entries []uintptr, indexBySymbolID map[uint64]uint32) ([]runtimeFuncPCFrame, bool) { +func appendRuntimeFuncInfoEntryFrames(frames []runtimeFuncPCFrame, entries []uintptr) ([]runtimeFuncPCFrame, bool) { if runtimeFuncInfoEntryStart == nil || runtimeFuncInfoEntryEnd == nil { return frames, false } @@ -757,7 +764,7 @@ func appendRuntimeFuncInfoEntryFrames(frames []runtimeFuncPCFrame, entries []uin if site == nil || site.pc == 0 || site.symbolID == 0 { continue } - funcIndex := indexBySymbolID[site.symbolID] + funcIndex := funcInfoIndexForSymbolID(site.symbolID) if funcIndex == 0 || uintptr(funcIndex) > runtimeFuncInfoCount { continue } @@ -773,7 +780,7 @@ func appendRuntimeFuncInfoEntryFrames(frames []runtimeFuncPCFrame, entries []uin return frames, used } -func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame, indexBySymbolID map[uint64]uint32) []runtimeFuncPCFrame { +func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame) []runtimeFuncPCFrame { if runtimeFuncInfoStubSiteStart == nil || runtimeFuncInfoStubSiteEnd == nil { return frames } @@ -792,7 +799,7 @@ func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame, indexBySym if site == nil || site.pc == 0 || site.symbolID == 0 { continue } - funcIndex := indexBySymbolID[site.symbolID] + funcIndex := funcInfoIndexForSymbolID(site.symbolID) if funcIndex == 0 || uintptr(funcIndex) > runtimeFuncInfoCount { continue } @@ -804,54 +811,32 @@ func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame, indexBySym return frames } -func funcInfoIndexBySymbolID() map[uint64]uint32 { - indexBySymbolID := make(map[uint64]uint32, runtimeFuncInfoCount) - for i := uintptr(0); i < runtimeFuncInfoCount; i++ { - id := funcInfoSymbolIDFromRecord(funcInfoAt(i)) - if id == 0 { - continue - } - index := uint32(i + 1) - if prev, ok := indexBySymbolID[id]; ok && prev != index { - indexBySymbolID[id] = 0 - continue - } - indexBySymbolID[id] = index +func funcInfoIndexForSymbolID(symbolID uint64) uint32 { + if symbolID == 0 || runtimeFuncInfoSymbolIndex == nil || runtimeFuncInfoSymbolIndexCount == 0 { + return 0 } - return indexBySymbolID -} - -func funcInfoSymbolIDFromRecord(rec *runtimeFuncInfoRecord) uint64 { - const ( - offset = uint64(14695981039346656037) - prime = uint64(1099511628211) - ) - if rec == nil { + if runtimeFuncInfoSymbolIndexCount > runtimeFuncInfoCount || runtimeFuncInfoSymbolIndexCount > 1<<20 { return 0 } - h := offset - h = funcInfoHashCString(h, funcInfoCString(rec.symbolPkg)) - pkgLen := cStringLen(funcInfoCString(rec.symbolPkg)) - name := funcInfoCString(rec.symbolName) - if pkgLen != 0 && cStringLen(name) != 0 { - h ^= uint64('.') - h *= prime + lo, hi := uintptr(0), runtimeFuncInfoSymbolIndexCount + size := unsafe.Sizeof(*runtimeFuncInfoSymbolIndex) + for lo < hi { + mid := (lo + hi) >> 1 + rec := (*runtimeFuncInfoSymbolIndexRecord)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoSymbolIndex), mid*size)) + if rec.symbolID >= symbolID { + hi = mid + } else { + lo = mid + 1 + } } - h = funcInfoHashCString(h, name) - if h == 0 { - return 1 + if lo >= runtimeFuncInfoSymbolIndexCount { + return 0 } - return h -} - -func funcInfoHashCString(h uint64, s *c.Char) uint64 { - const prime = uint64(1099511628211) - for s != nil && *s != 0 { - h ^= uint64(byte(*s)) - h *= prime - s = (*c.Char)(unsafe.Add(unsafe.Pointer(s), 1)) + rec := (*runtimeFuncInfoSymbolIndexRecord)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoSymbolIndex), lo*size)) + if rec.symbolID != symbolID || rec.funcIndex == 0 || uintptr(rec.funcIndex) > runtimeFuncInfoCount { + return 0 } - return h + return rec.funcIndex } func sortRuntimeFuncPCFrames(frames []runtimeFuncPCFrame) { From e7cffef597eb61a3c8536ed6f4360aca95669f6a Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 15:33:02 +0800 Subject: [PATCH 24/59] benchmark: add runtime funcinfo benchmark harness Bring over the cross-branch runtime funcinfo benchmark (hot, deep, multipkg, cold, stdlib scenarios) so #2012 can reproduce its own performance numbers. cold.FirstCallersFrames now walks to the first fully symbolized frame, because synthetic runtime frames (LLGo's runtime.Callers placeholder) carry no file/line and the metric was silently skipped on LLGo. Co-Authored-By: Claude Fable 5 --- benchmark/runtime_funcinfo/.gitignore | 2 + benchmark/runtime_funcinfo/README.md | 59 ++ benchmark/runtime_funcinfo/main.go | 1317 +++++++++++++++++++++++++ 3 files changed, 1378 insertions(+) create mode 100644 benchmark/runtime_funcinfo/.gitignore create mode 100644 benchmark/runtime_funcinfo/README.md create mode 100644 benchmark/runtime_funcinfo/main.go diff --git a/benchmark/runtime_funcinfo/.gitignore b/benchmark/runtime_funcinfo/.gitignore new file mode 100644 index 0000000000..d4632d51c8 --- /dev/null +++ b/benchmark/runtime_funcinfo/.gitignore @@ -0,0 +1,2 @@ +out/ +out-* diff --git a/benchmark/runtime_funcinfo/README.md b/benchmark/runtime_funcinfo/README.md new file mode 100644 index 0000000000..d80a73bc13 --- /dev/null +++ b/benchmark/runtime_funcinfo/README.md @@ -0,0 +1,59 @@ +# Runtime Funcinfo Benchmark + +This benchmark keeps runtime funcinfo measurements comparable across branches by +generating the same probe programs and rebuilding them with each compiler/root +pair in one run. + +It covers: + +- hot runtime metadata calls: `Caller`, `Callers`, `CallersFrames`, + `FuncForPC`, and `Func.FileLine`. +- deep stacks through direct calls, interface calls, and closures. +- many packages and methods, generated from configurable package/method counts. +- cold first-use runtime metadata paths, including lazy table initialization. +- a stdlib-heavy program with `encoding/json`, `text/template`, `regexp`, + `go/parser`, `go/token`, and `net/netip` imports. + +Generated modules use `example.com/llgo-bench/...` import paths. This is +intentional: LLGo does not enable caller-frame tracking for stdlib-shaped paths +without a dot, and that would benchmark the fallback path instead of normal +third-party package behavior. + +Example: + +```sh +go run ./benchmark/runtime_funcinfo \ + -runs=11 \ + -llgo-opt=2 \ + -variant go=go \ + -variant main=llgo,/path/to/llgo-main,/path/to/llgo-main-root \ + -variant 2002=llgo,/path/to/llgo-2002,/path/to/llgo-2002-root \ + -variant 2009=llgo,/path/to/llgo-2009,/path/to/llgo-2009-root \ + -variant 2010=llgo,/path/to/llgo-2010,/path/to/llgo-2010-root +``` + +Add `-include-lto` to build an additional `+lto` variant for every LLGo +compiler. LLGo builds use `-O2` by default; pass `-llgo-opt=` to omit the +optimization flag. Add `-scales=6x6,12x12,24x24` to generate separate +`multipkg_*` and `cold_*` scenarios for several package/function counts in one +run. Output is written to `benchmark/runtime_funcinfo/out` by default: + +- `summary.md`: markdown performance and size tables. +- `results.json`: raw build and run data. +- `work/`: generated probe modules. +- `bin/`: generated executables. + +Performance cells are `best/trimmed avg` from process-level runs. The trimmed +average drops one minimum and one maximum when at least three runs are present. +`-iters` is a base iteration count: `hot` uses the full count, `deep` uses a +quarter, and `multipkg`/`stdlib` use one twentieth because each operation does +substantially more work. + +`multipkg.FuncForPCMany` and `multipkg.FileLineMany` are batch metrics over all +generated target functions (`-packages * -methods`, 144 targets with the default +settings), not single-lookup timings. + +`cold.First*` metrics are single measurements from a fresh process and include +lazy runtime initialization that has not already happened in that process. +`cold.WarmFuncForPCMany` and `cold.WarmFileLineMany` use the same batch target +count as `multipkg`. diff --git a/benchmark/runtime_funcinfo/main.go b/benchmark/runtime_funcinfo/main.go new file mode 100644 index 0000000000..c23cb75fd0 --- /dev/null +++ b/benchmark/runtime_funcinfo/main.go @@ -0,0 +1,1317 @@ +package main + +import ( + "encoding/json" + "errors" + "flag" + "fmt" + "math" + "os" + "os/exec" + "path/filepath" + "sort" + "strconv" + "strings" + "time" +) + +type variantFlag []string + +func (v *variantFlag) String() string { + return strings.Join(*v, ";") +} + +func (v *variantFlag) Set(s string) error { + *v = append(*v, s) + return nil +} + +type variant struct { + Name string `json:"name"` + Kind string `json:"kind"` + Tool string `json:"tool"` + Root string `json:"root,omitempty"` + LTO bool `json:"lto,omitempty"` +} + +type scenario struct { + Name string `json:"name"` + Kind string `json:"kind"` + Dir string `json:"-"` + PackageCount int `json:"package_count,omitempty"` + MethodCount int `json:"method_count,omitempty"` + TargetCount int `json:"target_count,omitempty"` + Scale scenarioSize `json:"scale,omitempty"` +} + +type scenarioSize struct { + Packages int `json:"packages"` + Methods int `json:"methods"` +} + +type buildResult struct { + Variant string `json:"variant"` + Scenario string `json:"scenario"` + Binary string `json:"binary"` + Size int64 `json:"size_bytes"` + BuildMS int64 `json:"build_ms"` + Error string `json:"error,omitempty"` +} + +type runResult struct { + Variant string `json:"variant"` + Scenario string `json:"scenario"` + Metrics map[string][]int64 `json:"metrics_ns"` + Error string `json:"error,omitempty"` + Output string `json:"output,omitempty"` + Env map[string]string `json:"env,omitempty"` +} + +type resultFile struct { + GeneratedAt time.Time `json:"generated_at"` + PackageCount int `json:"package_count"` + MethodCount int `json:"method_count"` + Variants []variant `json:"variants"` + Scenarios []string `json:"scenarios"` + ScenarioMeta []scenario `json:"scenario_meta,omitempty"` + Builds []buildResult `json:"builds"` + Runs []runResult `json:"runs"` +} + +func main() { + var variants variantFlag + outDir := flag.String("out", filepath.Join("benchmark", "runtime_funcinfo", "out"), "output directory") + runs := flag.Int("runs", 11, "process runs per executable") + iters := flag.Int("iters", 200000, "inner benchmark iterations") + llgoOpt := flag.String("llgo-opt", "2", "LLGo optimization level passed as -O; empty disables the flag") + scenarioList := flag.String("scenarios", "hot,deep,multipkg,cold,stdlib", "comma-separated scenarios") + includeLTO := flag.Bool("include-lto", false, "also build full-LTO variants for LLGo compilers") + pkgCount := flag.Int("packages", 12, "generated package count for multipkg") + methodCount := flag.Int("methods", 12, "generated functions and methods per generated package") + scaleList := flag.String("scales", "", "optional comma-separated package x method scales for multipkg/cold, for example 6x6,12x12,24x24") + flag.Var(&variants, "variant", "variant definition: name=go or name=llgo,/path/to/llgo,/path/to/root") + flag.Parse() + + if len(variants) == 0 { + variants = append(variants, "go=go") + } + parsed, err := parseVariants(variants, *includeLTO) + if err != nil { + fatal(err) + } + if *runs <= 0 { + fatal(errors.New("-runs must be positive")) + } + if *iters <= 0 { + fatal(errors.New("-iters must be positive")) + } + if *pkgCount <= 0 || *methodCount <= 0 { + fatal(errors.New("-packages and -methods must be positive")) + } + scales, err := parseScales(*scaleList) + if err != nil { + fatal(err) + } + + absOut, err := filepath.Abs(*outDir) + if err != nil { + fatal(err) + } + if err := os.RemoveAll(absOut); err != nil { + fatal(err) + } + for _, dir := range []string{"work", "bin"} { + if err := os.MkdirAll(filepath.Join(absOut, dir), 0755); err != nil { + fatal(err) + } + } + + scenarios, err := generateScenarios(filepath.Join(absOut, "work"), splitList(*scenarioList), *pkgCount, *methodCount, scales) + if err != nil { + fatal(err) + } + + var builds []buildResult + var runsOut []runResult + for _, sc := range scenarios { + for _, v := range parsed { + br := buildScenario(absOut, sc, v, *llgoOpt) + builds = append(builds, br) + if br.Error != "" { + fmt.Fprintf(os.Stderr, "build failed: %s/%s: %s\n", v.Name, sc.Name, br.Error) + continue + } + rr := runScenario(sc, v, br.Binary, *runs, *iters) + runsOut = append(runsOut, rr) + if rr.Error != "" { + fmt.Fprintf(os.Stderr, "run failed: %s/%s: %s\n", v.Name, sc.Name, rr.Error) + } + } + } + + result := resultFile{ + GeneratedAt: time.Now(), + PackageCount: *pkgCount, + MethodCount: *methodCount, + Variants: parsed, + Scenarios: scenarioNames(scenarios), + ScenarioMeta: scenarios, + Builds: builds, + Runs: runsOut, + } + if err := writeJSON(filepath.Join(absOut, "results.json"), result); err != nil { + fatal(err) + } + summary := renderSummary(result) + if err := os.WriteFile(filepath.Join(absOut, "summary.md"), []byte(summary), 0644); err != nil { + fatal(err) + } + fmt.Print(summary) +} + +func parseVariants(values []string, includeLTO bool) ([]variant, error) { + var out []variant + seen := map[string]bool{} + for _, raw := range values { + name, spec, ok := strings.Cut(raw, "=") + if !ok || name == "" || spec == "" { + return nil, fmt.Errorf("bad -variant %q", raw) + } + if seen[name] { + return nil, fmt.Errorf("duplicate variant %q", name) + } + seen[name] = true + var v variant + v.Name = name + switch { + case spec == "go": + v.Kind = "go" + v.Tool = "go" + case strings.HasPrefix(spec, "go,"): + parts := strings.Split(spec, ",") + if len(parts) != 2 || parts[1] == "" { + return nil, fmt.Errorf("bad go variant %q", raw) + } + v.Kind = "go" + v.Tool = parts[1] + case strings.HasPrefix(spec, "llgo,"): + parts := strings.Split(spec, ",") + if len(parts) != 3 || parts[1] == "" || parts[2] == "" { + return nil, fmt.Errorf("bad llgo variant %q", raw) + } + v.Kind = "llgo" + v.Tool = parts[1] + v.Root = parts[2] + default: + return nil, fmt.Errorf("unknown variant kind in %q", raw) + } + out = append(out, v) + if includeLTO && v.Kind == "llgo" { + lto := v + lto.Name = v.Name + "+lto" + lto.LTO = true + out = append(out, lto) + } + } + return out, nil +} + +func generateScenarios(workDir string, names []string, pkgCount, methodCount int, scales []scenarioSize) ([]scenario, error) { + var out []scenario + for _, name := range names { + sizes := []scenarioSize{{Packages: pkgCount, Methods: methodCount}} + if len(scales) != 0 && (name == "multipkg" || name == "cold") { + sizes = scales + } + for _, size := range sizes { + scenarioName := name + if len(sizes) > 1 { + scenarioName = fmt.Sprintf("%s_%dx%d", name, size.Packages, size.Methods) + } + dir := filepath.Join(workDir, scenarioName) + if err := os.MkdirAll(dir, 0755); err != nil { + return nil, err + } + var err error + switch name { + case "hot": + err = generateHot(dir) + case "deep": + err = generateDeep(dir) + case "multipkg": + err = generateMultipkg(dir, size.Packages, size.Methods) + case "cold": + err = generateCold(dir, size.Packages, size.Methods) + case "stdlib": + err = generateStdlib(dir) + default: + return nil, fmt.Errorf("unknown scenario %q", name) + } + if err != nil { + return nil, err + } + sc := scenario{Name: scenarioName, Kind: name, Dir: dir} + if name == "multipkg" || name == "cold" { + sc.PackageCount = size.Packages + sc.MethodCount = size.Methods + sc.TargetCount = size.Packages * size.Methods + sc.Scale = size + } + out = append(out, sc) + } + } + return out, nil +} + +func writeModule(dir, module string) error { + return os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module "+module+"\n\ngo 1.24\n"), 0644) +} + +func generateHot(dir string) error { + if err := writeModule(dir, "example.com/llgo-bench/hot"); err != nil { + return err + } + return os.WriteFile(filepath.Join(dir, "main.go"), []byte(hotSource), 0644) +} + +func generateDeep(dir string) error { + if err := writeModule(dir, "example.com/llgo-bench/deep"); err != nil { + return err + } + var b strings.Builder + b.WriteString(deepPrefix) + for i := 0; i < 32; i++ { + fmt.Fprintf(&b, "//go:noinline\nfunc frame%d() { frame%d() }\n\n", i, i+1) + } + b.WriteString(`//go:noinline +func frame32() { + pc, file, line, ok := runtime.Caller(16) + if !ok || pc == 0 || file == "" || line == 0 { + panic("bad deep caller") + } + sinkPC = pc + sinkString = file + sinkInt += line +} + +`) + b.WriteString(deepSuffix) + return os.WriteFile(filepath.Join(dir, "main.go"), []byte(b.String()), 0644) +} + +func generateMultipkg(dir string, pkgCount, methodCount int) error { + if err := writeModule(dir, "example.com/llgo-bench/multipkg"); err != nil { + return err + } + for i := 0; i < pkgCount; i++ { + pkgName := fmt.Sprintf("p%02d", i) + pkgDir := filepath.Join(dir, pkgName) + if err := os.MkdirAll(pkgDir, 0755); err != nil { + return err + } + var b strings.Builder + fmt.Fprintf(&b, "package %s\n\n", pkgName) + b.WriteString("import (\n\t\"reflect\"\n\t\"runtime\"\n") + if i+1 < pkgCount { + fmt.Fprintf(&b, "\tnext \"example.com/llgo-bench/multipkg/p%02d\"\n", i+1) + } + b.WriteString(")\n\n") + fmt.Fprintf(&b, "type T%02d struct { V int }\n", i) + b.WriteString("type Worker interface { M00(int) int }\n\n") + for j := 0; j < methodCount; j++ { + fmt.Fprintf(&b, "//go:noinline\nfunc F%02d_%02d(x int) int { return x + %d }\n\n", i, j, i*100+j) + fmt.Fprintf(&b, "//go:noinline\nfunc (t T%02d) M%02d(x int) int { return t.V + x + %d }\n\n", i, j, j) + } + b.WriteString("//go:noinline\nfunc Targets() []uintptr {\n\treturn []uintptr{\n") + for j := 0; j < methodCount; j++ { + fmt.Fprintf(&b, "\t\treflect.ValueOf(F%02d_%02d).Pointer(),\n", i, j) + } + b.WriteString("\t}\n}\n\n") + b.WriteString("//go:noinline\nfunc Run(x int) int {\n") + b.WriteString("\tpc, _, line, ok := runtime.Caller(0)\n\tif !ok || pc == 0 || line == 0 { panic(\"bad caller\") }\n") + fmt.Fprintf(&b, "\tvar w Worker = T%02d{V: x}\n", i) + b.WriteString("\ttotal := w.M00(x)\n") + for j := 0; j < methodCount; j++ { + fmt.Fprintf(&b, "\ttotal += F%02d_%02d(x)\n", i, j) + fmt.Fprintf(&b, "\ttotal += (T%02d{V: total}).M%02d(x)\n", i, j) + } + if i+1 < pkgCount { + b.WriteString("\ttotal += next.Run(x+1)\n") + } + b.WriteString("\treturn total + line\n}\n") + if err := os.WriteFile(filepath.Join(pkgDir, pkgName+".go"), []byte(b.String()), 0644); err != nil { + return err + } + } + + var main strings.Builder + main.WriteString("package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"runtime\"\n\t\"time\"\n") + for i := 0; i < pkgCount; i++ { + fmt.Fprintf(&main, "\tp%02d \"example.com/llgo-bench/multipkg/p%02d\"\n", i, i) + } + main.WriteString(")\n\nvar sinkInt int\nvar sinkString string\n\n") + main.WriteString(commonBenchHelpers) + main.WriteString("func main() {\n\titers := benchIters(10000)\n\tvar targets []uintptr\n") + for i := 0; i < pkgCount; i++ { + fmt.Fprintf(&main, "\ttargets = append(targets, p%02d.Targets()...)\n", i) + } + main.WriteString(` + if funcInfoReady(targets) { + measure("multipkg.FuncForPCMany", iters, func() { + total := 0 + for _, pc := range targets { + fn := runtime.FuncForPC(pc) + if fn == nil { + panic("missing func") + } + total += len(fn.Name()) + } + sinkInt += total + }) + measure("multipkg.FileLineMany", iters, func() { + total := 0 + for _, pc := range targets { + fn := runtime.FuncForPC(pc) + if fn == nil { + panic("missing func") + } + file, line := fn.FileLine(pc) + if file == "" || line == 0 { + panic("missing fileline") + } + total += line + len(file) + } + sinkInt += total + }) + } + measure("multipkg.DeepRun", iters, func() { + sinkInt += p00.Run(1) + }) + fmt.Println("sink=", sinkInt, sinkString) +} + +func funcInfoReady(targets []uintptr) bool { + for _, pc := range targets { + fn := runtime.FuncForPC(pc) + if fn == nil { + return false + } + if file, line := fn.FileLine(pc); file == "" || line == 0 { + return false + } + } + return len(targets) != 0 +} +`) + return os.WriteFile(filepath.Join(dir, "main.go"), []byte(main.String()), 0644) +} + +func generateCold(dir string, pkgCount, methodCount int) error { + if err := writeModule(dir, "example.com/llgo-bench/cold"); err != nil { + return err + } + for i := 0; i < pkgCount; i++ { + pkgName := fmt.Sprintf("p%02d", i) + pkgDir := filepath.Join(dir, pkgName) + if err := os.MkdirAll(pkgDir, 0755); err != nil { + return err + } + var b strings.Builder + fmt.Fprintf(&b, "package %s\n\n", pkgName) + b.WriteString("import \"reflect\"\n\n") + for j := 0; j < methodCount; j++ { + fmt.Fprintf(&b, "//go:noinline\nfunc F%02d_%02d(x int) int { return x + %d }\n\n", i, j, i*100+j) + } + b.WriteString("//go:noinline\nfunc Targets() []uintptr {\n\treturn []uintptr{\n") + for j := 0; j < methodCount; j++ { + fmt.Fprintf(&b, "\t\treflect.ValueOf(F%02d_%02d).Pointer(),\n", i, j) + } + b.WriteString("\t}\n}\n") + if err := os.WriteFile(filepath.Join(pkgDir, pkgName+".go"), []byte(b.String()), 0644); err != nil { + return err + } + } + + var main strings.Builder + main.WriteString("package main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"runtime\"\n\t\"time\"\n") + for i := 0; i < pkgCount; i++ { + fmt.Fprintf(&main, "\tp%02d \"example.com/llgo-bench/cold/p%02d\"\n", i, i) + } + main.WriteString(")\n\nvar sinkInt int\nvar sinkString string\n\n") + main.WriteString(commonBenchHelpers) + main.WriteString("func main() {\n\titers := benchIters(10000)\n\tvar targets []uintptr\n") + for i := 0; i < pkgCount; i++ { + fmt.Fprintf(&main, "\ttargets = append(targets, p%02d.Targets()...)\n", i) + } + main.WriteString(` + if len(targets) == 0 { + panic("missing targets") + } + first := targets[len(targets)/2] + start := time.Now() + fn := runtime.FuncForPC(first) + if fn == nil || fn.Name() == "" { + panic("missing first func") + } + fmt.Printf("cold.FirstFuncForPC=%d\n", time.Since(start).Nanoseconds()) + sinkString = fn.Name() + + start = time.Now() + file, line := fn.FileLine(first) + if file == "" || line == 0 { + panic("missing first fileline") + } + fmt.Printf("cold.FirstFileLine=%d\n", time.Since(start).Nanoseconds()) + sinkString = file + sinkInt += line + + start = time.Now() + pc, file, line, ok := runtime.Caller(0) + if !ok || pc == 0 || file == "" || line == 0 { + panic("bad first caller") + } + fmt.Printf("cold.FirstCaller0=%d\n", time.Since(start).Nanoseconds()) + sinkString = file + sinkInt += line + + start = time.Now() + var pcs [16]uintptr + n := runtime.Callers(0, pcs[:]) + frames := runtime.CallersFrames(pcs[:n]) + // Walk to the first fully symbolized frame: synthetic runtime frames + // (e.g. LLGo's runtime.Callers placeholder) carry no file/line. + for { + frame, more := frames.Next() + if frame.Function != "" && frame.File != "" && frame.Line != 0 { + fmt.Printf("cold.FirstCallersFrames=%d\n", time.Since(start).Nanoseconds()) + sinkString = frame.Function + sinkInt += frame.Line + break + } + if !more { + break + } + } + + measure("cold.WarmFuncForPCMany", iters, func() { + total := 0 + for _, pc := range targets { + fn := runtime.FuncForPC(pc) + if fn == nil { + panic("missing func") + } + total += len(fn.Name()) + } + sinkInt += total + }) + measure("cold.WarmFileLineMany", iters, func() { + total := 0 + for _, pc := range targets { + fn := runtime.FuncForPC(pc) + if fn == nil { + panic("missing func") + } + file, line := fn.FileLine(pc) + if file == "" || line == 0 { + panic("missing fileline") + } + total += len(file) + line + } + sinkInt += total + }) + fmt.Println("sink=", sinkInt, sinkString) +} +`) + return os.WriteFile(filepath.Join(dir, "main.go"), []byte(main.String()), 0644) +} + +func generateStdlib(dir string) error { + if err := writeModule(dir, "example.com/llgo-bench/stdlib"); err != nil { + return err + } + return os.WriteFile(filepath.Join(dir, "main.go"), []byte(stdlibSource), 0644) +} + +func buildScenario(outDir string, sc scenario, v variant, llgoOpt string) buildResult { + bin := filepath.Join(outDir, "bin", safeName(v.Name)+"_"+sc.Name) + if v.LTO { + bin += "_lto" + } + if exeSuffix := executableSuffix(); exeSuffix != "" { + bin += exeSuffix + } + start := time.Now() + var cmd *exec.Cmd + switch v.Kind { + case "go": + cmd = exec.Command(v.Tool, "build", "-trimpath", "-o", bin, ".") + case "llgo": + args := []string{"build", "-trimpath", "-a", "-o", bin} + if llgoOpt != "" { + args = append(args, "-O"+llgoOpt) + } + if v.LTO { + args = append(args, "-lto=full") + } + args = append(args, ".") + cmd = exec.Command(v.Tool, args...) + default: + return buildResult{Variant: v.Name, Scenario: sc.Name, Binary: bin, Error: "unknown variant kind"} + } + cmd.Dir = sc.Dir + cmd.Env = os.Environ() + if v.Kind == "llgo" { + cmd.Env = append(cmd.Env, "LLGO_ROOT="+v.Root, "LLGO_FUNCINFO=1") + } + out, err := cmd.CombinedOutput() + br := buildResult{Variant: v.Name, Scenario: sc.Name, Binary: bin, BuildMS: time.Since(start).Milliseconds()} + if err != nil { + br.Error = strings.TrimSpace(string(out)) + if br.Error == "" { + br.Error = err.Error() + } + return br + } + info, err := os.Stat(bin) + if err != nil { + br.Error = err.Error() + return br + } + br.Size = info.Size() + return br +} + +func runScenario(sc scenario, v variant, bin string, runs, iters int) runResult { + scenarioIters := iterationsForScenario(sc.Kind, iters) + rr := runResult{ + Variant: v.Name, + Scenario: sc.Name, + Metrics: map[string][]int64{}, + Env: map[string]string{ + "BENCH_ITERS": strconv.Itoa(scenarioIters), + }, + } + for i := 0; i < runs; i++ { + cmd := exec.Command(bin) + cmd.Dir = sc.Dir + cmd.Env = append(os.Environ(), "BENCH_ITERS="+strconv.Itoa(scenarioIters)) + out, err := cmd.CombinedOutput() + if err != nil { + rr.Error = err.Error() + rr.Output = string(out) + return rr + } + metrics, err := parseMetrics(out) + if err != nil { + rr.Error = err.Error() + rr.Output = string(out) + return rr + } + for k, v := range metrics { + rr.Metrics[k] = append(rr.Metrics[k], v) + } + } + return rr +} + +func iterationsForScenario(name string, base int) int { + div := 1 + switch name { + case "deep": + div = 4 + case "multipkg", "cold", "stdlib": + div = 20 + } + n := base / div + if n < 1 { + return 1 + } + return n +} + +func parseMetrics(out []byte) (map[string]int64, error) { + metrics := map[string]int64{} + for _, raw := range strings.Split(string(out), "\n") { + line := strings.TrimSpace(raw) + if line == "" || !strings.Contains(line, "=") { + continue + } + name, value, _ := strings.Cut(line, "=") + if !strings.Contains(name, ".") { + continue + } + n, err := strconv.ParseInt(strings.TrimSpace(value), 10, 64) + if err != nil { + return nil, fmt.Errorf("parse metric %q: %w", line, err) + } + metrics[strings.TrimSpace(name)] = n + } + return metrics, nil +} + +func renderSummary(result resultFile) string { + var b strings.Builder + fmt.Fprintf(&b, "# Runtime Funcinfo Benchmark\n\nGenerated: `%s`\n\n", result.GeneratedAt.Format(time.RFC3339)) + b.WriteString("Cells are `best/trimmed avg`. Runtime metrics use `ns/op`; sizes use MiB.\n\n") + for _, sc := range result.ScenarioMeta { + if sc.TargetCount == 0 { + continue + } + switch sc.Kind { + case "multipkg": + fmt.Fprintf(&b, "`%s` uses `multipkg.FuncForPCMany` and `multipkg.FileLineMany` batch metrics over %d target functions (%d packages x %d functions).\n\n", + sc.Name, sc.TargetCount, sc.PackageCount, sc.MethodCount) + case "cold": + fmt.Fprintf(&b, "`%s` uses `cold.WarmFuncForPCMany` and `cold.WarmFileLineMany` batch metrics over %d target functions (%d packages x %d functions). `cold.First*` metrics are one per process and include lazy runtime initialization that has not already happened in that process.\n\n", + sc.Name, sc.TargetCount, sc.PackageCount, sc.MethodCount) + } + } + for _, sc := range result.Scenarios { + metrics := metricsForScenario(result.Runs, sc) + if len(metrics) == 0 { + continue + } + fmt.Fprintf(&b, "## %s Performance\n\n", sc) + b.WriteString("| metric |") + for _, v := range result.Variants { + b.WriteString(" " + v.Name + " |") + } + b.WriteString("\n|---|") + for range result.Variants { + b.WriteString("---:|") + } + b.WriteString("\n") + for _, metric := range metrics { + b.WriteString("| " + metric + " |") + for _, v := range result.Variants { + rr, found := findRun(result.Runs, v.Name, sc) + cell := "FAIL" + if found && rr.Error == "" { + cell = "n/a" + } + if vals := rr.Metrics[metric]; len(vals) != 0 { + cell = formatPerf(vals) + } + b.WriteString(" " + cell + " |") + } + b.WriteString("\n") + } + b.WriteString("\n") + } + b.WriteString("## Binary Size\n\n| scenario |") + for _, v := range result.Variants { + b.WriteString(" " + v.Name + " |") + } + b.WriteString("\n|---|") + for range result.Variants { + b.WriteString("---:|") + } + b.WriteString("\n") + for _, sc := range result.Scenarios { + b.WriteString("| " + sc + " |") + for _, v := range result.Variants { + cell := "FAIL" + if br := findBuild(result.Builds, v.Name, sc); br.Error == "" && br.Size > 0 { + cell = formatMiB(br.Size) + } + b.WriteString(" " + cell + " |") + } + b.WriteString("\n") + } + b.WriteString("\n## Build Time\n\n| scenario |") + for _, v := range result.Variants { + b.WriteString(" " + v.Name + " |") + } + b.WriteString("\n|---|") + for range result.Variants { + b.WriteString("---:|") + } + b.WriteString("\n") + for _, sc := range result.Scenarios { + b.WriteString("| " + sc + " |") + for _, v := range result.Variants { + cell := "FAIL" + if br := findBuild(result.Builds, v.Name, sc); br.Error == "" { + cell = formatDurationMS(br.BuildMS) + } + b.WriteString(" " + cell + " |") + } + b.WriteString("\n") + } + return b.String() +} + +func metricsForScenario(runs []runResult, scenario string) []string { + set := map[string]bool{} + for _, rr := range runs { + if rr.Scenario != scenario || rr.Error != "" { + continue + } + for k := range rr.Metrics { + set[k] = true + } + } + out := make([]string, 0, len(set)) + for k := range set { + out = append(out, k) + } + sort.Strings(out) + return out +} + +func findRun(runs []runResult, variant, scenario string) (runResult, bool) { + for _, rr := range runs { + if rr.Variant == variant && rr.Scenario == scenario { + return rr, true + } + } + return runResult{Metrics: map[string][]int64{}}, false +} + +func findBuild(builds []buildResult, variant, scenario string) buildResult { + for _, br := range builds { + if br.Variant == variant && br.Scenario == scenario { + return br + } + } + return buildResult{Error: "missing"} +} + +func formatPerf(values []int64) string { + if len(values) == 0 { + return "n/a" + } + sorted := append([]int64(nil), values...) + sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] }) + best := sorted[0] + avgVals := sorted + if len(sorted) >= 3 { + avgVals = sorted[1 : len(sorted)-1] + } + var sum int64 + for _, v := range avgVals { + sum += v + } + avg := float64(sum) / float64(len(avgVals)) + return formatNS(float64(best)) + "/" + formatNS(avg) +} + +func formatNS(ns float64) string { + switch { + case ns >= 1e6: + return trimFloat(ns/1e6) + "ms" + case ns >= 1e3: + return trimFloat(ns/1e3) + "us" + default: + return trimFloat(ns) + "ns" + } +} + +func formatMiB(bytes int64) string { + return trimFloat(float64(bytes)/(1024*1024)) + " MiB" +} + +func formatDurationMS(ms int64) string { + if ms >= 1000 { + return trimFloat(float64(ms)/1000) + "s" + } + return strconv.FormatInt(ms, 10) + "ms" +} + +func trimFloat(v float64) string { + if math.Abs(v-math.Round(v)) < 0.05 { + return strconv.FormatInt(int64(math.Round(v)), 10) + } + return strconv.FormatFloat(v, 'f', 1, 64) +} + +func writeJSON(path string, data any) error { + raw, err := json.MarshalIndent(data, "", " ") + if err != nil { + return err + } + raw = append(raw, '\n') + return os.WriteFile(path, raw, 0644) +} + +func splitList(s string) []string { + var out []string + for _, part := range strings.Split(s, ",") { + part = strings.TrimSpace(part) + if part != "" { + out = append(out, part) + } + } + return out +} + +func parseScales(s string) ([]scenarioSize, error) { + var out []scenarioSize + for _, part := range splitList(s) { + left, right, ok := strings.Cut(part, "x") + if !ok { + left, right, ok = strings.Cut(part, "X") + } + if !ok { + return nil, fmt.Errorf("bad scale %q: want packages x methods, for example 12x12", part) + } + packages, err := strconv.Atoi(strings.TrimSpace(left)) + if err != nil || packages <= 0 { + return nil, fmt.Errorf("bad package count in scale %q", part) + } + methods, err := strconv.Atoi(strings.TrimSpace(right)) + if err != nil || methods <= 0 { + return nil, fmt.Errorf("bad method count in scale %q", part) + } + out = append(out, scenarioSize{Packages: packages, Methods: methods}) + } + return out, nil +} + +func scenarioNames(scenarios []scenario) []string { + out := make([]string, len(scenarios)) + for i, sc := range scenarios { + out[i] = sc.Name + } + return out +} + +func safeName(s string) string { + replacer := strings.NewReplacer("/", "_", "\\", "_", ":", "_", "+", "_") + return replacer.Replace(s) +} + +func executableSuffix() string { + if os.PathSeparator == '\\' { + return ".exe" + } + return "" +} + +func fatal(err error) { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) +} + +const commonBenchHelpers = ` +func benchIters(def int) int { + if s := getenv("BENCH_ITERS"); s != "" { + n, err := atoi(s) + if err == nil && n > 0 { + return n + } + } + return def +} + +func measure(name string, n int, fn func()) { + fn() + start := time.Now() + for i := 0; i < n; i++ { + fn() + } + elapsed := time.Since(start).Nanoseconds() + if n <= 0 { + panic("bad iterations") + } + fmt.Printf("%s=%d\n", name, elapsed/int64(n)) +} + +func getenv(k string) string { + for _, kv := range os.Environ() { + if len(kv) > len(k) && kv[:len(k)] == k && kv[len(k)] == '=' { + return kv[len(k)+1:] + } + } + return "" +} + +func atoi(s string) (int, error) { + n := 0 + for _, r := range s { + if r < '0' || r > '9' { + return 0, fmt.Errorf("bad int") + } + n = n*10 + int(r-'0') + } + return n, nil +} +` + +const hotSource = `package main + +import ( + "fmt" + "os" + "reflect" + "runtime" + "time" +) + +var sinkInt int +var sinkPC uintptr +var sinkString string + +` + commonBenchHelpers + ` + +//go:noinline +func entryTarget(x int) int { + return x + 7 +} + +//go:noinline +func caller0() { + pc, file, line, ok := runtime.Caller(0) + if !ok || pc == 0 || file == "" || line == 0 { + panic("bad caller0") + } + sinkPC = pc + sinkString = file + sinkInt += line +} + +//go:noinline +func caller1() { + caller1Helper() +} + +//go:noinline +func caller1Helper() { + pc, file, line, ok := runtime.Caller(1) + if !ok || pc == 0 || file == "" || line == 0 { + panic("bad caller1") + } + sinkPC = pc + sinkString = file + sinkInt += line +} + +//go:noinline +func returnPC() uintptr { + pc, _, _, ok := runtime.Caller(0) + if !ok || pc == 0 { + panic("bad return pc") + } + return pc +} + +//go:noinline +func callersOnly() { + var pcs [16]uintptr + n := runtime.Callers(0, pcs[:]) + if n == 0 || pcs[0] == 0 { + panic("bad callers") + } + sinkPC = pcs[0] + sinkInt += n +} + +//go:noinline +func callersFramesFirst() { + var pcs [16]uintptr + n := runtime.Callers(0, pcs[:]) + frames := runtime.CallersFrames(pcs[:n]) + for { + frame, more := frames.Next() + if frame.Function != "" && frame.File != "" && frame.Line != 0 { + sinkString = frame.Function + sinkInt += frame.Line + return + } + if !more { + break + } + } + panic("bad frame") +} + +func callersFramesReady() (ok bool) { + defer func() { + if recover() != nil { + ok = false + } + }() + callersFramesFirst() + return true +} + +func main() { + iters := benchIters(200000) + entryPC := reflect.ValueOf(entryTarget).Pointer() + returnedPC := returnPC() + measure("hot.Caller0", iters, caller0) + measure("hot.Caller1", iters, caller1) + measure("hot.CallersOnly", iters, callersOnly) + if callersFramesReady() { + measure("hot.CallersFramesFirst", iters, callersFramesFirst) + } + if entryFn := runtime.FuncForPC(entryPC); entryFn != nil && entryFn.Name() != "" { + measure("hot.FuncForPCEntry", iters, func() { + fn := runtime.FuncForPC(entryPC) + if fn == nil { + panic("missing entry func") + } + sinkString = fn.Name() + }) + if file, line := entryFn.FileLine(entryPC); file != "" && line != 0 { + measure("hot.FuncFileLineEntry", iters, func() { + file, line := entryFn.FileLine(entryPC) + if file == "" || line == 0 { + panic("missing entry fileline") + } + sinkString = file + sinkInt += line + }) + } + } + if returnFn := runtime.FuncForPC(returnedPC); returnFn != nil && returnFn.Name() != "" { + measure("hot.FuncForPCReturnPC", iters, func() { + fn := runtime.FuncForPC(returnedPC) + if fn == nil { + panic("missing return func") + } + sinkString = fn.Name() + }) + if file, line := returnFn.FileLine(returnedPC); file != "" && line != 0 { + measure("hot.FuncFileLineReturnPC", iters, func() { + file, line := returnFn.FileLine(returnedPC) + if file == "" || line == 0 { + panic("missing return fileline") + } + sinkString = file + sinkInt += line + }) + } + } + fmt.Println("sink=", sinkInt, sinkPC, sinkString) +} +` + +const deepPrefix = `package main + +import ( + "fmt" + "os" + "runtime" + "time" +) + +var sinkInt int +var sinkPC uintptr +var sinkString string + +` + commonBenchHelpers + ` + +type callerIface interface { + call() +} + +type callerImpl struct{} + +//go:noinline +func (callerImpl) call() { + frame0() +} + +//go:noinline +func closureLayer(next func()) func() { + return func() { + next() + } +} + +//go:noinline +func callInterface(c callerIface) { + c.call() +} + +//go:noinline +func callClosure() { + closureLayer(closureLayer(frame0))() +} + +` + +const deepSuffix = `//go:noinline +func framesAll() { + frame0() + var pcs [64]uintptr + n := runtime.Callers(0, pcs[:]) + frames := runtime.CallersFrames(pcs[:n]) + total := 0 + for { + frame, more := frames.Next() + if frame.Function != "" { + total += len(frame.Function) + frame.Line + } + if !more { + break + } + } + if total == 0 { + panic("bad frames") + } + sinkInt += total +} + +func deepReady(fn func()) (ok bool) { + defer func() { + if recover() != nil { + ok = false + } + }() + fn() + return true +} + +func main() { + iters := benchIters(50000) + if deepReady(frame0) { + measure("deep.Direct32", iters, frame0) + } + if deepReady(func() { callInterface(callerImpl{}) }) { + measure("deep.Interface32", iters, func() { callInterface(callerImpl{}) }) + } + if deepReady(callClosure) { + measure("deep.Closure32", iters, callClosure) + } + if deepReady(framesAll) { + measure("deep.CallersFramesAll", iters, framesAll) + } + fmt.Println("sink=", sinkInt, sinkPC, sinkString) +} +` + +const stdlibSource = `package main + +import ( + "bytes" + "encoding/json" + "fmt" + "go/parser" + "go/token" + "net/netip" + "os" + "reflect" + "regexp" + "runtime" + "strings" + "text/template" + "time" +) + +var sinkInt int +var sinkString string + +` + commonBenchHelpers + ` + +type payload struct { + Name string + Items []int + Addr string +} + +//go:noinline +func stdTarget(x int) int { + return x*3 + 1 +} + +//go:noinline +func stdWork() { + p := payload{Name: "llgo", Items: []int{1, 2, 3, 5, 8}, Addr: "127.0.0.1:8080"} + raw, err := json.Marshal(p) + if err != nil { + panic(err) + } + var out payload + if err := json.Unmarshal(raw, &out); err != nil { + panic(err) + } + tmpl := template.Must(template.New("x").Funcs(template.FuncMap{"join": strings.Join}).Parse("{{.Name}}:{{join .Words \",\"}}")) + var buf bytes.Buffer + if err := tmpl.Execute(&buf, map[string]any{"Name": out.Name, "Words": []string{"a", "b", "c"}}); err != nil { + panic(err) + } + re := regexp.MustCompile("[a-z]+") + matches := re.FindAllString(buf.String(), -1) + expr, err := parser.ParseExpr("1 + 2*3") + if err != nil || expr == nil { + panic("bad parser") + } + fs := token.NewFileSet() + file := fs.AddFile("bench.go", -1, 100) + file.AddLine(10) + addr := netip.MustParseAddrPort(out.Addr) + sinkInt += len(matches) + int(addr.Port()) + int(file.Line(token.Pos(11))) + sinkString = buf.String() +} + +//go:noinline +func stdCaller() { + pc, file, line, ok := runtime.Caller(0) + if !ok || pc == 0 || file == "" || line == 0 { + panic("bad caller") + } + sinkInt += line + sinkString = file +} + +//go:noinline +func stdFrames() { + var pcs [16]uintptr + n := runtime.Callers(0, pcs[:]) + frames := runtime.CallersFrames(pcs[:n]) + for { + frame, more := frames.Next() + if frame.Function != "" && frame.File != "" && frame.Line != 0 { + sinkInt += frame.Line + sinkString = frame.Function + return + } + if !more { + break + } + } + panic("bad frame") +} + +func stdFramesReady() (ok bool) { + defer func() { + if recover() != nil { + ok = false + } + }() + stdFrames() + return true +} + +func main() { + iters := benchIters(50000) + entryPC := reflect.ValueOf(stdTarget).Pointer() + measure("stdlib.Work", iters/10, stdWork) + measure("stdlib.Caller0", iters, stdCaller) + if stdFramesReady() { + measure("stdlib.CallersFramesFirst", iters, stdFrames) + } + if fn := runtime.FuncForPC(entryPC); fn != nil && fn.Name() != "" { + measure("stdlib.FuncForPCEntry", iters, func() { + fn := runtime.FuncForPC(entryPC) + if fn == nil { + panic("missing func") + } + sinkString = fn.Name() + }) + if file, line := fn.FileLine(entryPC); file != "" && line != 0 { + measure("stdlib.FuncFileLineEntry", iters, func() { + file, line := fn.FileLine(entryPC) + if file == "" || line == 0 { + panic("missing fileline") + } + sinkInt += line + sinkString = file + }) + } + } + fmt.Println("sink=", sinkInt, sinkString) +} +` From 7eed0d067c8d2a50475c4f50339b239435f91e4a Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 15:33:19 +0800 Subject: [PATCH 25/59] build,cl: emit funcinfo site sections on Mach-O macOS previously had no entry/stub/pcline site sections, so first-use funcinfo initialization fell back to one dlsym per function and per stub (13ms cold on a small binary, 27ms with LTO), and statement-level pc-line records did not exist at all. Emit the same site records on Mach-O: - __DATA,__llgo_fie / __llgo_stub / __llgo_pcl sections with the live_support attribute: under ld64/lld -dead_strip a live_support atom survives only if the atom it references (the anchor label inside the function body) is live, which matches the records-follow-function semantics ELF gets from SHF_LINK_ORDER with --gc-sections. - One lowercase-l linker-private symbol per record so each record is its own atom and dead functions drop exactly their own records. - Assembler-local (L-prefixed) pc-site labels: Mach-O subsections-via-symbols treats visible labels as atom boundaries, and a visible label in the middle of a function let the linker split and reorder function bodies. - Boundary symbols via ld64's section$start$/section$end$, emitted with the \x01 verbatim-name prefix so LLVM does not prepend the Mach-O underscore. - A no_dead_strip zero record per section in the main module keeps the sections (and their boundary symbols) present even when no package contributed records. Co-Authored-By: Claude Fable 5 --- cl/caller_frame_test.go | 15 ++- cl/instr.go | 32 +++++- internal/build/funcinfo_table.go | 167 ++++++++++++++++++++++++------- 3 files changed, 169 insertions(+), 45 deletions(-) diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index e5c9fd7f0a..ec252c14fb 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -652,7 +652,7 @@ func top() { } } -func TestCompileRuntimeCallerPCLineMetadataSkippedOnDarwin(t *testing.T) { +func TestCompileRuntimeCallerPCLineMetadataOnDarwin(t *testing.T) { ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo import "runtime" @@ -669,8 +669,17 @@ func top() { t.Fatal(err) } ir := pkg.Module().String() - if strings.Contains(ir, `!llgo.pcline`) || strings.Contains(ir, "__llgo_pcsite_") { - t.Fatalf("darwin should not emit inline asm pc-site labels:\n%s", ir) + for _, want := range []string{ + `!llgo.pcline`, + "__llgo_pcsite_", + `.pushsection __DATA,__llgo_pcl`, + } { + if !strings.Contains(ir, want) { + t.Fatalf("darwin should emit Mach-O pc-site labels, missing %q:\n%s", want, ir) + } + } + if strings.Contains(ir, `.pushsection llgo_pcline`) { + t.Fatalf("darwin must not use the ELF pcline section syntax:\n%s", ir) } } diff --git a/cl/instr.go b/cl/instr.go index 136b514a47..8f8cbf240d 100644 --- a/cl/instr.go +++ b/cl/instr.go @@ -1404,6 +1404,13 @@ func (p *context) emitPCLineLabel(b llssa.Builder, pos token.Pos) { p.pcLineSeq++ id := pcLineID(p.fn.Name(), p.pcLineSeq) label := pcLineLabelName(id) + if target.GOOS == "darwin" { + // Mach-O subsections-via-symbols treats every non-local symbol as an + // atom boundary; a visible label in the middle of a function body + // lets the linker split and reorder the function. The "L" prefix + // keeps the label assembler-local so the function stays one atom. + label = "L" + label + } asmLabel := label + "_${:uid}" ptrDirective := ".quad" align := "3" @@ -1411,10 +1418,22 @@ func (p *context) emitPCLineLabel(b llssa.Builder, pos token.Pos) { ptrDirective = ".long" align = "2" } + // Keep section names in sync with internal/build/funcinfo_table.go + // (pcLineSiteSectionInfo). ELF ties the record to the function via + // SHF_LINK_ORDER (honored by --gc-sections); Mach-O uses a live_support + // section plus one linker-private atom symbol per record so -dead_strip + // keeps a record exactly when the function containing its label is live. + pushSection := ".pushsection llgo_pcline,\"ao\",@progbits," + asmQuoteSymbol(p.fn.Name()) + recordSymbol := "" + if target.GOOS == "darwin" { + pushSection = ".pushsection __DATA,__llgo_pcl,regular,live_support" + recordSymbol = "l_llgo_pcline_rec_${:uid}:\n" + } b.InlineAsm( asmLabel + ":\n" + - ".pushsection llgo_pcline,\"ao\",@progbits," + asmQuoteSymbol(p.fn.Name()) + "\n" + + pushSection + "\n" + ".p2align " + align + "\n" + + recordSymbol + ptrDirective + " " + asmLabel + "\n" + ".quad " + uint64Hex(id) + "\n" + ".popsection", @@ -1429,9 +1448,14 @@ func canEmitPCLineLabelsForTarget(target *llssa.Target) bool { if target.Target != "" || target.GOARCH == "wasm" { return false } - // This path uses ELF SHF_LINK_ORDER section syntax. Darwin needs a Mach-O - // live_support section path, and other object formats need separate support. - return target.GOOS == "linux" + // ELF uses SHF_LINK_ORDER associated sections; Mach-O uses plain + // __DATA,__llgo_pcl sections (safe because LLGo's global DCE runs at the + // IR level). Other object formats need separate support. + switch target.GOOS { + case "linux", "darwin": + return true + } + return false } func pcLineID(symbol string, seq uint64) uint64 { diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index de89bab7c3..b6f3857ce0 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -46,12 +46,6 @@ const ( pcLineCountSymbol = "__llgo_pcline_count" pcSiteStartPtrSymbol = "__llgo_pcsite_start" pcSiteEndPtrSymbol = "__llgo_pcsite_end" - funcInfoEntryStartSymbol = "__start_llgo_funcinfo_entry" - funcInfoEntryEndSymbol = "__stop_llgo_funcinfo_entry" - funcInfoStubSiteStartSymbol = "__start_llgo_funcinfo_stubsite" - funcInfoStubSiteEndSymbol = "__stop_llgo_funcinfo_stubsite" - pcSiteStartSymbol = "__start_llgo_pcline" - pcSiteEndSymbol = "__stop_llgo_pcline" funcInfoDataSymbol = "__llgo_funcinfo_table$data" pcLineDataSymbol = "__llgo_pcline_table$data" funcInfoStringsDataSymbol = "__llgo_funcinfo_strings$data" @@ -452,9 +446,10 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord llvm.ConstInt(countType, 0, false), })) pcLineCount.SetInitializer(llvm.ConstInt(countType, uint64(len(encoded.PCLines)), false)) - if shouldEmitRuntimeELFSites(ctx) { - pcSiteStart := llvm.AddGlobal(mod, pcSiteRecordType, pcSiteStartSymbol) - pcSiteEnd := llvm.AddGlobal(mod, pcSiteRecordType, pcSiteEndSymbol) + if shouldEmitRuntimeSites(ctx) { + startName, endName := pcLineSiteSectionInfo.boundary(shouldEmitRuntimeMachOSites(ctx)) + pcSiteStart := llvm.AddGlobal(mod, pcSiteRecordType, startName) + pcSiteEnd := llvm.AddGlobal(mod, pcSiteRecordType, endName) pcSiteStartPtr.SetInitializer(pcSiteStart) pcSiteEndPtr.SetInitializer(pcSiteEnd) } else { @@ -462,13 +457,15 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord pcSiteEndPtr.SetInitializer(llvm.ConstPointerNull(pcSiteEndPtr.GlobalValueType())) } } - emitELFSites := shouldEmitRuntimeELFSites(ctx) + machOSites := shouldEmitRuntimeMachOSites(ctx) + emitSites := shouldEmitRuntimeSites(ctx) emitEntrySites := shouldEmitRuntimeEntryELFSites(ctx) && len(encoded.Records) != 0 emitStubSites := shouldEmitRuntimeStubELFSites(ctx) - emitRuntimeFuncInfoELFSites(mod, ctx.prog.PointerSize(), emitELFSites && len(pcLineValues) != 0, emitEntrySites, emitStubSites && len(stubRecords) != 0) + emitRuntimeFuncInfoSites(mod, ctx.prog.PointerSize(), machOSites, emitSites && len(pcLineValues) != 0, emitEntrySites, emitStubSites && len(stubRecords) != 0) if emitEntrySites { - entryStart := llvm.AddGlobal(mod, funcEntryRecordType, funcInfoEntryStartSymbol) - entryEnd := llvm.AddGlobal(mod, funcEntryRecordType, funcInfoEntryEndSymbol) + startName, endName := entrySiteSectionInfo.boundary(machOSites) + entryStart := llvm.AddGlobal(mod, funcEntryRecordType, startName) + entryEnd := llvm.AddGlobal(mod, funcEntryRecordType, endName) entryStartPtr.SetInitializer(entryStart) entryEndPtr.SetInitializer(entryEnd) } else { @@ -476,8 +473,9 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord entryEndPtr.SetInitializer(llvm.ConstPointerNull(entryEndPtr.GlobalValueType())) } if emitStubSites && len(stubRecords) != 0 { - stubSiteStart := llvm.AddGlobal(mod, stubSiteRecordType, funcInfoStubSiteStartSymbol) - stubSiteEnd := llvm.AddGlobal(mod, stubSiteRecordType, funcInfoStubSiteEndSymbol) + startName, endName := stubSiteSectionInfo.boundary(machOSites) + stubSiteStart := llvm.AddGlobal(mod, stubSiteRecordType, startName) + stubSiteEnd := llvm.AddGlobal(mod, stubSiteRecordType, endName) stubSiteStartPtr.SetInitializer(stubSiteStart) stubSiteEndPtr.SetInitializer(stubSiteEnd) } else { @@ -607,12 +605,100 @@ func shouldEmitRuntimeELFSites(ctx *context) bool { ctx.buildConf.Target == "" } +func shouldEmitRuntimeMachOSites(ctx *context) bool { + return ctx != nil && + ctx.buildConf != nil && + ctx.buildConf.Goos == "darwin" && + ctx.buildConf.Target == "" +} + +// shouldEmitRuntimeSites reports whether the target object format has a +// DCE-safe section story for metadata site records. ELF uses SHF_LINK_ORDER +// associated sections (honored by --gc-sections). Mach-O uses live_support +// sections: under ld64/lld -dead_strip a live_support atom survives only if +// the atom it references (the anchor inside the function body) is live, which +// is the same records-follow-function semantics. +func shouldEmitRuntimeSites(ctx *context) bool { + return shouldEmitRuntimeELFSites(ctx) || shouldEmitRuntimeMachOSites(ctx) +} + func shouldEmitRuntimeStubELFSites(ctx *context) bool { - return shouldEmitRuntimeELFSites(ctx) + return shouldEmitRuntimeSites(ctx) } func shouldEmitRuntimeEntryELFSites(ctx *context) bool { - return shouldEmitRuntimeELFSites(ctx) + return shouldEmitRuntimeSites(ctx) +} + +// siteSectionInfo names one metadata site section in both object formats. +// Mach-O section names are capped at 16 characters, hence the short forms. +type siteSectionInfo struct { + elf string + machO string +} + +var ( + entrySiteSectionInfo = siteSectionInfo{elf: "llgo_funcinfo_entry", machO: "__DATA,__llgo_fie"} + stubSiteSectionInfo = siteSectionInfo{elf: "llgo_funcinfo_stubsite", machO: "__DATA,__llgo_stub"} + pcLineSiteSectionInfo = siteSectionInfo{elf: "llgo_pcline", machO: "__DATA,__llgo_pcl"} +) + +func (s siteSectionInfo) push(machO bool, anchor string) string { + if machO { + return ".pushsection " + s.machO + ",regular,live_support" + } + return ".pushsection " + s.elf + ",\"ao\",@progbits," + anchor +} + +// recordSymbol returns the extra label line each Mach-O record needs: the +// lowercase-l linker-private symbol splits the section into one atom per +// record, so -dead_strip can drop records individually, and the symbol itself +// is discarded at link time. ELF needs nothing here. +func (s siteSectionInfo) recordSymbol(machO bool, kind string) string { + if !machO { + return "" + } + return "l_llgo_" + kind + "_rec_${:uid}:\n" +} + +func (s siteSectionInfo) retain(machO bool) string { + if machO { + return ".section " + s.machO + ",regular,live_support" + } + return ".section " + s.elf + ",\"aR\",@progbits" +} + +// retainSymbol returns the label lines that pin the zero record under +// -dead_strip on Mach-O; nothing references the zero record, so it must be a +// no_dead_strip atom for the section (and its boundary symbols) to survive. +func (s siteSectionInfo) retainSymbol(machO bool, kind string) string { + if !machO { + return "" + } + sym := "l_llgo_" + kind + "_zero" + return sym + ":\n.no_dead_strip " + sym + "\n" +} + +// boundary returns the linker-synthesized section boundary symbols: ELF +// __start_/__stop_ for C-identifier section names, ld64 section$start$/ +// section$end$ for Mach-O. +func (s siteSectionInfo) boundary(machO bool) (start, end string) { + if machO { + base := strings.Replace(s.machO, ",", "$", 1) + // The \x01 prefix makes LLVM emit the name verbatim. Without it the + // Mach-O mangler prepends an underscore and the linker no longer + // recognizes the exact section$start$SEG$SECT boundary spelling. + return "\x01section$start$" + base, "\x01section$end$" + base + } + return "__start_" + s.elf, "__stop_" + s.elf +} + +func siteAnchorLabel(machO bool, kind string) string { + if machO { + // Mach-O assembler-local labels use the plain "L" prefix. + return "Lllgo_" + kind + "_anchor_${:uid}" + } + return ".Lllgo_" + kind + "_anchor_${:uid}" } func emitFuncInfoEntrySites(ctx *context, pkg llssa.Package) { @@ -634,12 +720,13 @@ func emitFuncInfoEntrySites(ctx *context, pkg llssa.Package) { return } // This is LLGo's DCE-safe substitute for the function PC list that Go's - // linker has while building pclntab. The inline-asm fragment lives in an - // associated ELF section tied to the function body, so global DCE removes - // the entry record with the function instead of keeping dead code alive. - // Runtime still sorts these final PCs before building the Go-style - // findfunc bucket index, because LLVM IR generation does not know final - // linked text order. + // linker has while building pclntab. The inline-asm fragment lives in a + // section tied to the function body (SHF_LINK_ORDER on ELF; on Mach-O the + // record is removed with the function by IR-level global DCE), so dead + // functions do not leave stale entry records behind. Runtime still sorts + // these final PCs before building the Go-style findfunc bucket index, + // because LLVM IR generation does not know final linked text order. + machO := shouldEmitRuntimeMachOSites(ctx) llvmCtx := mod.Context() builder := llvmCtx.NewBuilder() defer builder.Dispose() @@ -669,10 +756,11 @@ func emitFuncInfoEntrySites(ctx *context, pkg llssa.Package) { } else { builder.SetInsertPointBefore(first) } - anchor := ".Lllgo_funcinfo_entry_anchor_${:uid}" + anchor := siteAnchorLabel(machO, "funcinfo_entry") instruction := anchor + ":\n" + - ".pushsection llgo_funcinfo_entry,\"ao\",@progbits," + anchor + "\n" + + entrySiteSectionInfo.push(machO, anchor) + "\n" + ".p2align " + align + "\n" + + entrySiteSectionInfo.recordSymbol(machO, "funcinfo_entry") + ptrDirective + " " + anchor + "\n" + ".quad " + uint64Hex(symbolID) + "\n" + ".popsection" @@ -685,6 +773,7 @@ func emitFuncInfoStubSites(ctx *context, pkg llssa.Package) { if !shouldEmitRuntimeStubELFSites(ctx) || pkg == nil || !ctx.prog.FuncInfoMetadataEnabled() { return } + machO := shouldEmitRuntimeMachOSites(ctx) mod := pkg.Module() llvmCtx := mod.Context() builder := llvmCtx.NewBuilder() @@ -715,10 +804,11 @@ func emitFuncInfoStubSites(ctx *context, pkg llssa.Package) { } else { builder.SetInsertPointBefore(first) } - anchor := ".Lllgo_funcinfo_stubsite_anchor_${:uid}" + anchor := siteAnchorLabel(machO, "funcinfo_stubsite") instruction := anchor + ":\n" + - ".pushsection llgo_funcinfo_stubsite,\"ao\",@progbits," + anchor + "\n" + + stubSiteSectionInfo.push(machO, anchor) + "\n" + ".p2align " + align + "\n" + + stubSiteSectionInfo.recordSymbol(machO, "funcinfo_stubsite") + ptrDirective + " " + anchor + "\n" + ".quad " + uint64Hex(funcInfoSymbolID(target)) + "\n" + ".popsection" @@ -755,7 +845,10 @@ func uint64Hex(v uint64) string { return string(buf[:]) } -func emitRuntimeFuncInfoELFSites(mod llvm.Module, pointerSize int, pcSite bool, entrySite bool, stubSite bool) { +// emitRuntimeFuncInfoSites emits one zero record per used site section so the +// section always exists and the linker-synthesized boundary symbols resolve +// even when no package contributed records. Runtime skips zero records. +func emitRuntimeFuncInfoSites(mod llvm.Module, pointerSize int, machO bool, pcSite bool, entrySite bool, stubSite bool) { if !pcSite && !entrySite && !stubSite { return } @@ -766,23 +859,21 @@ func emitRuntimeFuncInfoELFSites(mod llvm.Module, pointerSize int, pcSite bool, align = "2" } var asm strings.Builder - if pcSite { - asm.WriteString(".section llgo_pcline,\"aR\",@progbits\n") + writeZeroRecord := func(info siteSectionInfo, kind string) { + asm.WriteString(info.retain(machO) + "\n") asm.WriteString(".p2align " + align + "\n") + asm.WriteString(info.retainSymbol(machO, kind)) asm.WriteString(ptrDirective + " 0\n") asm.WriteString(".quad 0\n") } + if pcSite { + writeZeroRecord(pcLineSiteSectionInfo, "pcline") + } if entrySite { - asm.WriteString(".section llgo_funcinfo_entry,\"aR\",@progbits\n") - asm.WriteString(".p2align " + align + "\n") - asm.WriteString(ptrDirective + " 0\n") - asm.WriteString(".quad 0\n") + writeZeroRecord(entrySiteSectionInfo, "funcinfo_entry") } if stubSite { - asm.WriteString(".section llgo_funcinfo_stubsite,\"aR\",@progbits\n") - asm.WriteString(".p2align " + align + "\n") - asm.WriteString(ptrDirective + " 0\n") - asm.WriteString(".quad 0\n") + writeZeroRecord(stubSiteSectionInfo, "funcinfo_stubsite") } mod.SetInlineAsm(asm.String()) } From e0aefe145d933f4a9938f091aa38e32a850934dc Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 15:33:34 +0800 Subject: [PATCH 26/59] runtime: cut funcinfo first-use latency and index degradation First-use initialization: - Skip the per-stub dlsym loop when the stub-site section provided the frames; each dlsym is a dynamic-loader query and the loop dominated cold latency. - Materialize per-function strings and entry PCs once per function and packed file strings once per file ID during pcline table construction instead of once per site. Cold FuncForPC fast path: before the frame table exists, resolve exact function-value PCs with a bounded linear scan of the raw entry-site and stub-site sections (compile-time data, no loader query), then one dladdr as fallback; both require an entry match within the warm path's slack so stripped-local misattribution is impossible. The path is budgeted: after a handful of cold lookups the sorted table amortizes better, so it is built as usual. cold.FirstFuncForPC drops from 13ms to ~35us on macOS. Find index: subbucket deltas are now uint16 and the whole-index abandonment on delta overflow is gone. Go stores uint8 deltas because its linker guarantees a 16-byte minimum function size; LLGo indexes call-site records that sit a few bytes apart, and a dense 4KiB bucket silently degraded every lookup in the process to a full binary search. A delta counts deduplicated PCs inside one bucket, so it is bounded by the bucket size and uint16 cannot overflow. Observability: LLGO_FUNCINFO_DEBUG=1 prints one line per lazily built table (frame/bucket counts, index built or fallback, sites vs dlsym sources) so benchmarks can tell which path they measured. Co-Authored-By: Claude Fable 5 --- .../lib/runtime/pprof_runtime_stub_llgo.go | 22 ++ runtime/internal/lib/runtime/symtab.go | 240 ++++++++++++++++-- 2 files changed, 234 insertions(+), 28 deletions(-) diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index efdc55c49c..c8528de635 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -118,6 +118,28 @@ func funcForPCSlow(pc uintptr) *Func { return fn } } else if pc != 0 { + // Cold fast path: before the entry frame table has been built, resolve + // an exact function-entry PC without paying first-use table + // construction. First a bounded linear scan of the raw entry-site + // section (compile-time data, no dynamic-loader query), then one + // dladdr as fallback. Requiring an exact entry match means a + // stripped-local misattribution (dladdr returning the nearest + // exported symbol) can never be accepted, so this path only ever + // answers true function-value PCs. The path is intentionally capped: + // each cold lookup costs microseconds, so after a handful of them the + // sorted table is the cheaper answer and we fall through to build it. + if !runtimeFuncPCFramesBuilt() && coldFuncPCLookupBudget() { + if sym, ok := coldFuncInfoEntryLookup(pc); ok { + fn := newFuncForPC(pc, sym) + cacheFuncForPC(pc, fn) + return fn + } + if sym := addrInfoSymbol(pc); sym.ok && sym.entry == pc && sym.function != "" { + fn := newFuncForPC(pc, sym) + cacheFuncForPC(pc, fn) + return fn + } + } // Function-value PCs point at the real function entry. ELF funcinfo // entry-site anchors are emitted from LLVM IR and can land after the // backend prologue, so an exact entry PC may sort before its anchor. diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index c56d3bba5c..2ed6d3f9ab 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -9,6 +9,7 @@ import ( c "github.com/goplus/llgo/runtime/internal/clite" clitedebug "github.com/goplus/llgo/runtime/internal/clite/debug" + cliteos "github.com/goplus/llgo/runtime/internal/clite/os" latomic "github.com/goplus/llgo/runtime/internal/lib/sync/atomic" rtdebug "github.com/goplus/llgo/runtime/internal/runtime" ) @@ -249,7 +250,7 @@ type runtimeFuncPCFrame struct { type runtimePCFindBucket struct { idx uint32 - subbuckets [runtimePCFindSubbucket]uint8 + subbuckets [runtimePCFindSubbucket]uint16 } type runtimePCFindIndex struct { @@ -259,8 +260,13 @@ type runtimePCFindIndex struct { const ( // Keep the lookup geometry aligned with Go's pclntab findfunc table: - // 4096-byte buckets, 16 subbuckets, and one-byte function-index deltas. - // LLGo currently builds this compact index at first use after reading + // 4096-byte buckets and 16 subbuckets. Go stores one-byte subbucket + // deltas because its linker guarantees a 16-byte minimum function size; + // LLGo has no minimum size for function entries and indexes call-site + // records that can sit a few bytes apart, so it stores two-byte deltas. + // A delta counts distinct PCs inside one 4096-byte bucket and therefore + // can never exceed 4096, which makes uint16 overflow impossible and the + // index unconditional. LLGo builds the index at first use after reading // DCE-safe entry PC sections, because the LLVM IR stage does not yet own // final text addresses the way cmd/link does for Go. runtimePCMinFuncSize = uintptr(16) @@ -663,6 +669,68 @@ func initRuntimeFuncPCFrames() { initRuntimeFuncPCFramesSlow() } +// runtimeFuncPCFramesBuilt reports whether the entry frame table has already +// been constructed, without triggering its construction. +func runtimeFuncPCFramesBuilt() bool { + return latomic.LoadUint32(&runtimeFuncPCInitState) == runtimeFuncInfoInitDone +} + +// Set LLGO_FUNCINFO_DEBUG=1 to print one line per lazily built runtime +// metadata table. This is how benchmarks and bug reports can tell whether a +// lookup used the compact find index or a degraded full-table fallback. +var runtimeFuncInfoDebugState uint32 + +var runtimeFuncPCFramesFromSites bool +var runtimeFuncPCStubsFromSites bool + +func runtimeFuncInfoDebugEnabled() bool { + state := latomic.LoadUint32(&runtimeFuncInfoDebugState) + if state == 0 { + state = 1 + if p := cliteos.Getenv(c.AllocaCStr("LLGO_FUNCINFO_DEBUG")); p != nil { + if v := c.GoString(p); v != "" && v != "0" { + state = 2 + } + } + latomic.StoreUint32(&runtimeFuncInfoDebugState, state) + } + return state == 2 +} + +func runtimeFuncInfoDebugSource(fromSites bool) string { + if fromSites { + return "sites" + } + return "dlsym" +} + +func runtimeFuncInfoDebugIndex(index runtimePCFindIndex) string { + if len(index.buckets) != 0 { + return "built" + } + return "fallback" +} + +func reportRuntimeFuncPCDebug() { + if !runtimeFuncInfoDebugEnabled() { + return + } + println("llgo funcinfo: func table frames=", len(runtimeFuncPCFrames), + " buckets=", len(runtimeFuncPCIndex.buckets), + " index=", runtimeFuncInfoDebugIndex(runtimeFuncPCIndex), + " entries=", runtimeFuncInfoDebugSource(runtimeFuncPCFramesFromSites), + " stubs=", runtimeFuncInfoDebugSource(runtimeFuncPCStubsFromSites)) +} + +func reportRuntimePCLineDebug() { + if !runtimeFuncInfoDebugEnabled() { + return + } + println("llgo funcinfo: pcline table frames=", len(runtimePCLineFrames), + " buckets=", len(runtimePCLineIndex.buckets), + " index=", runtimeFuncInfoDebugIndex(runtimePCLineIndex)) +} + func initRuntimeFuncPCFramesSlow() { for { state := latomic.LoadUint32(&runtimeFuncPCInitState) @@ -673,6 +741,7 @@ func initRuntimeFuncPCFramesSlow() { if latomic.CompareAndSwapUint32(&runtimeFuncPCInitState, runtimeFuncInfoInitUninit, runtimeFuncInfoInitBusy) { initRuntimeFuncPCFramesOnce() latomic.StoreUint32(&runtimeFuncPCInitState, runtimeFuncInfoInitDone) + reportRuntimeFuncPCDebug() return } } @@ -712,12 +781,14 @@ func initRuntimeFuncPCFramesOnce() { } } } - frames = appendRuntimeFuncInfoStubSiteFrames(frames) + frames, usedStubSites := appendRuntimeFuncInfoStubSiteFrames(frames) // Closure stubs are an ABI adapter and may go away in a future closure // lowering. Keep the fallback compatibility table light: it stores only - // target funcinfo record indexes. On ELF we prefer the associated stub-site - // section above because linkers do not expose local stubs through dlsym. - if runtimeFuncInfoStubIndexes != nil && runtimeFuncInfoStubCount != 0 && runtimeFuncInfoStubCount <= runtimeFuncInfoCount { + // target funcinfo record indexes. When the stub-site section is present it + // is authoritative (linkers do not expose local stubs through dlsym), and + // skipping the dlsym loop below matters: each dlsym is a dynamic-loader + // query, and one query per stub used to dominate first-use latency. + if !usedStubSites && runtimeFuncInfoStubIndexes != nil && runtimeFuncInfoStubCount != 0 && runtimeFuncInfoStubCount <= runtimeFuncInfoCount { if symbolBuf == nil { symbolBuf = make([]byte, 0, maxFuncInfoSymbolLen()+len(runtimeClosureStubPrefix)+1) } @@ -742,6 +813,8 @@ func initRuntimeFuncPCFramesOnce() { runtimeFuncPCFrames = frames runtimeFuncPCEntries = entries runtimeFuncPCIndex = buildRuntimeFuncPCIndex(frames) + runtimeFuncPCFramesFromSites = usedEntrySites + runtimeFuncPCStubsFromSites = usedStubSites } func appendRuntimeFuncInfoEntryFrames(frames []runtimeFuncPCFrame, entries []uintptr) ([]runtimeFuncPCFrame, bool) { @@ -780,20 +853,21 @@ func appendRuntimeFuncInfoEntryFrames(frames []runtimeFuncPCFrame, entries []uin return frames, used } -func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame) []runtimeFuncPCFrame { +func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame) ([]runtimeFuncPCFrame, bool) { if runtimeFuncInfoStubSiteStart == nil || runtimeFuncInfoStubSiteEnd == nil { - return frames + return frames, false } start := uintptr(unsafe.Pointer(runtimeFuncInfoStubSiteStart)) end := uintptr(unsafe.Pointer(runtimeFuncInfoStubSiteEnd)) size := unsafe.Sizeof(*runtimeFuncInfoStubSiteStart) if end <= start || size == 0 || (end-start)%size != 0 { - return frames + return frames, false } nsite := (end - start) / size if nsite > runtimeFuncInfoCount*16 || nsite > 1<<20 { - return frames + return frames, false } + used := false for i := uintptr(0); i < nsite; i++ { site := (*runtimeFuncInfoStubSiteRecord)(unsafe.Pointer(start + i*size)) if site == nil || site.pc == 0 || site.symbolID == 0 { @@ -807,8 +881,9 @@ func appendRuntimeFuncInfoStubSiteFrames(frames []runtimeFuncPCFrame) []runtimeF entry: site.pc, funcIndex: funcIndex, }) + used = true } - return frames + return frames, used } func funcInfoIndexForSymbolID(symbolID uint64) uint32 { @@ -949,11 +1024,13 @@ func buildRuntimeFuncPCIndex(frames []runtimeFuncPCFrame) runtimePCFindIndex { if subIdx > len(frames)-1 { subIdx = len(frames) - 1 } + // delta counts deduplicated PCs inside one bucket, so it is + // bounded by the bucket size and always fits in uint16. delta := subIdx - baseIdx - if delta < 0 || delta > 255 { + if delta < 0 || delta > 0xffff { return runtimePCFindIndex{} } - buckets[b].subbuckets[s] = uint8(delta) + buckets[b].subbuckets[s] = uint16(delta) } } return runtimePCFindIndex{base: base, buckets: buckets} @@ -1048,6 +1125,84 @@ func funcEntryForIndex(index uint32) uintptr { return runtimeFuncPCEntries[index] } +// coldFuncInfoEntryLookup resolves an exact function-entry PC by scanning the +// raw entry-site and stub-site sections, without building the sorted frame +// table and without any dynamic-loader query. Function values can point at +// either a real function entry or its closure stub, so both sections are +// scanned. The scan is linear, so it is capped: for larger binaries the +// dladdr cold path is cheaper than streaming the whole section. +const coldFuncInfoEntryScanLimit = 4096 + +// coldFuncInfoScanRange scans one {pc, symbolID} record section for the +// anchor nearest at-or-after pc within the warm path's entry slack (anchors +// are emitted from LLVM IR and land after the backend prologue). It returns +// the matched funcinfo index and delta, or (0, maxDelta) on miss. +func coldFuncInfoScanRange(start, end, size, pc uintptr, bestDelta uintptr) (uint32, uintptr) { + if start == 0 || end <= start || size == 0 || (end-start)%size != 0 { + return 0, bestDelta + } + nsite := (end - start) / size + if nsite > coldFuncInfoEntryScanLimit || nsite > runtimeFuncInfoCount*16 { + return 0, bestDelta + } + bestIndex := uint32(0) + for i := uintptr(0); i < nsite; i++ { + site := (*runtimeFuncInfoEntryRecord)(unsafe.Pointer(start + i*size)) + if site.symbolID == 0 || site.pc < pc { + continue + } + delta := site.pc - pc + if delta >= bestDelta { + continue + } + funcIndex := funcInfoIndexForSymbolID(site.symbolID) + if funcIndex == 0 || uintptr(funcIndex) > runtimeFuncInfoCount { + continue + } + bestDelta = delta + bestIndex = funcIndex + if delta == 0 { + break + } + } + return bestIndex, bestDelta +} + +// coldFuncPCLookupBudget grants a small number of table-free cold lookups per +// process; past that, building the sorted table amortizes better than more +// linear scans or dladdr calls. +var coldFuncPCLookupCount uint32 + +func coldFuncPCLookupBudget() bool { + return latomic.AddUint32(&coldFuncPCLookupCount, 1) <= 8 +} + +func coldFuncInfoEntryLookup(pc uintptr) (pcSymbol, bool) { + if pc == 0 { + return pcSymbol{}, false + } + bestDelta := uintptr(runtimeFuncPCEntrySlack) + 1 + bestIndex := uint32(0) + if runtimeFuncInfoEntryStart != nil && runtimeFuncInfoEntryEnd != nil { + bestIndex, bestDelta = coldFuncInfoScanRange( + uintptr(unsafe.Pointer(runtimeFuncInfoEntryStart)), + uintptr(unsafe.Pointer(runtimeFuncInfoEntryEnd)), + unsafe.Sizeof(*runtimeFuncInfoEntryStart), pc, bestDelta) + } + if bestDelta != 0 && runtimeFuncInfoStubSiteStart != nil && runtimeFuncInfoStubSiteEnd != nil { + if idx, _ := coldFuncInfoScanRange( + uintptr(unsafe.Pointer(runtimeFuncInfoStubSiteStart)), + uintptr(unsafe.Pointer(runtimeFuncInfoStubSiteEnd)), + unsafe.Sizeof(*runtimeFuncInfoStubSiteStart), pc, bestDelta); idx != 0 { + bestIndex = idx + } + } + if bestIndex == 0 { + return pcSymbol{}, false + } + return pcSymbolForFuncInfoIndex(pc, pc, bestIndex) +} + func funcPCFrameForPC(pc uintptr) (pcSymbol, bool) { if pc == 0 { return pcSymbol{}, false @@ -1123,6 +1278,7 @@ func initRuntimePCLineFramesSlow() { if latomic.CompareAndSwapUint32(&runtimePCLineInitState, runtimeFuncInfoInitUninit, runtimeFuncInfoInitBusy) { initRuntimePCLineFramesOnce() latomic.StoreUint32(&runtimePCLineInitState, runtimeFuncInfoInitDone) + reportRuntimePCLineDebug() return } } @@ -1156,6 +1312,18 @@ func initRuntimePCLineFramesOnce() { } frames := make([]runtimePCLineFrame, 0, nsite) symbolBuf := make([]byte, 0, maxFuncInfoSymbolLen()+1) + // Sites vastly outnumber distinct functions and files, so materialize the + // per-function strings and entry PCs once and the packed file strings once + // per file ID. Building them per site used to dominate first-use latency. + type pcLineFuncInfo struct { + entry uintptr + function string + file string + line int + resolved bool + } + funcCache := make([]pcLineFuncInfo, runtimeFuncInfoCount+1) + fileCache := make(map[uint32]string) for i := uintptr(0); i < nsite; i++ { site := (*runtimePCSiteRecord)(unsafe.Pointer(start + i*size)) if site == nil || site.id == 0 || site.pc == 0 { @@ -1167,33 +1335,47 @@ func initRuntimePCLineFramesOnce() { } pc := site.pc fn := funcInfoAt(uintptr(rec.funcIndex) - 1) - entry := funcEntryForIndex(rec.funcIndex) - if entry == 0 { - entry = symbolPCFuncInfoName(symbolBuf, fn.symbolPkg, fn.symbolName) + fc := &funcCache[rec.funcIndex] + if !fc.resolved { + fc.entry = funcEntryForIndex(rec.funcIndex) + if fc.entry == 0 { + fc.entry = symbolPCFuncInfoName(symbolBuf, fn.symbolPkg, fn.symbolName) + } + fc.function = publicFunctionName(funcInfoJoinName(fn.namePkg, fn.nameName)) + if fc.function == "" { + fc.function = publicFunctionName(funcInfoJoinName(fn.symbolPkg, fn.symbolName)) + } + fc.file = funcInfoJoinFile(fn.fileRoot, fn.fileName) + fc.line = int(fn.line) + fc.resolved = true } + entry := fc.entry if entry == 0 { sym := addrInfoSymbol(pc) entry = sym.entry } - file := funcInfoPackedFile(rec.file) + file := "" + if rec.file != 0 { + var ok bool + if file, ok = fileCache[rec.file]; !ok { + file = funcInfoPackedFile(rec.file) + fileCache[rec.file] = file + } + } if file == "" { - file = funcInfoJoinFile(fn.fileRoot, fn.fileName) + file = fc.file } line := int(rec.line) if line == 0 { - line = int(fn.line) - } - function := publicFunctionName(funcInfoJoinName(fn.namePkg, fn.nameName)) - if function == "" { - function = publicFunctionName(funcInfoJoinName(fn.symbolPkg, fn.symbolName)) + line = fc.line } frames = append(frames, runtimePCLineFrame{ pc: pc, entry: entry, - function: function, + function: fc.function, file: file, line: line, - startLine: int(fn.line), + startLine: fc.line, }) } sortRuntimePCLineFrames(frames) @@ -1339,11 +1521,13 @@ func buildRuntimePCLineIndex(frames []runtimePCLineFrame) runtimePCFindIndex { if subIdx > len(frames)-1 { subIdx = len(frames) - 1 } + // delta counts deduplicated PCs inside one bucket, so it is + // bounded by the bucket size and always fits in uint16. delta := subIdx - baseIdx - if delta < 0 || delta > 255 { + if delta < 0 || delta > 0xffff { return runtimePCFindIndex{} } - buckets[b].subbuckets[s] = uint8(delta) + buckets[b].subbuckets[s] = uint16(delta) } } return runtimePCFindIndex{base: base, buckets: buckets} From 7976022990a7e6a12d238dc2fdc238d668a2481d Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 15:33:46 +0800 Subject: [PATCH 27/59] runtime: memoize synthetic PCs in Caller/Callers hot paths Every Caller/Callers capture used to intern the frame into the synthetic table: a hash probe plus a full frame comparison per stack slot per call. Memoize the interned PC base in the shadow-stack slot and invalidate it when the recorded line changes (for one entry the instrumented name/file operands are constants, so the line is the only thing that varies between call sites). The three static frames emitted around every Callers walk get per-store memo slots, and the emit loop is unrolled so nothing escapes and skipped frames are never captured. macOS: hot.CallersOnly 182ns -> 125ns (Go 1.26: 118ns); with LTO 96ns. hot.CallersFramesFirst 528ns -> 471ns, 354ns with LTO (Go: 401ns). Co-Authored-By: Claude Fable 5 --- runtime/internal/runtime/caller.go | 91 +++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 14 deletions(-) diff --git a/runtime/internal/runtime/caller.go b/runtime/internal/runtime/caller.go index e4cbd3cf03..341158e825 100644 --- a/runtime/internal/runtime/caller.go +++ b/runtime/internal/runtime/caller.go @@ -30,6 +30,12 @@ type CallerFrame struct { File string Line int StartLine int + // captured memoizes the interned synthetic PC base (seq << 2) for this + // exact frame content. It is cleared whenever the frame's line info + // changes, so repeated Caller/Callers walks over an unchanged stack skip + // the intern hash probe entirely. Only meaningful inside shadow-stack + // slots; ignored by frame comparison and hashing. + captured uintptr } const callerLocationLimit = 4096 @@ -46,6 +52,11 @@ type callerLocationStore struct { stack []CallerFrame synthetic []CallerFrame syntheticHash []uintptr + // Memoized synthetic PC bases for the static frames emitted around every + // Callers walk. Per-store because synthetic sequences are per-store. + callersPCBase uintptr + mainPCBase uintptr + goexitPCBase uintptr } var callerLocationTLS = tls.Alloc[*callerLocationStore](nil) @@ -106,7 +117,15 @@ func updateCurrentFrame(entry uintptr, name, file string, line int) { if frame.Entry == entry { frame.Function = name frame.File = file - frame.Line = line + // For one entry the instrumented name/file operands are + // constants; only the line changes between call sites. Comparing + // just the line keeps this per-call path free of string + // comparisons while still invalidating the capture memo whenever + // the frame content can differ. + if frame.Line != line { + frame.Line = line + frame.captured = 0 + } return } } @@ -148,7 +167,7 @@ func Caller(skip int) (CallerFrame, bool) { return CallerFrame{}, false } if skip < len(store.stack) { - return store.captureFrame(store.stack[len(store.stack)-1-skip], callerPCValue), true + return store.captureFrameAt(&store.stack[len(store.stack)-1-skip], callerPCValue), true } switch skip - len(store.stack) { case 0: @@ -171,29 +190,39 @@ func Callers(skip int, pcs []uintptr) int { if store == nil || len(store.stack) == 0 { return 0 } + // Unrolled emit sequence: no closure so nothing escapes, and frames the + // skip count drops are never captured at all. n := 0 - add := func(frame CallerFrame) bool { + if skip > 0 { + skip-- + } else { + pcs[n] = store.staticPC(runtimeCallersFrame, &store.callersPCBase, callersPCValue) + n++ + } + for i := len(store.stack) - 1; i >= 0; i-- { if skip > 0 { skip-- - return true + continue } if n >= len(pcs) { - return false + return n } - pcs[n] = store.captureFrame(frame, callersPCValue).PC + pcs[n] = store.capturePC(&store.stack[i], callersPCValue) n++ - return true - } - if !add(runtimeCallersFrame) { - return n } - for i := len(store.stack) - 1; i >= 0; i-- { - if !add(store.stack[i]) { + if skip > 0 { + skip-- + } else { + if n >= len(pcs) { return n } + pcs[n] = store.staticPC(runtimeMainFrame, &store.mainPCBase, callersPCValue) + n++ + } + if skip <= 0 && n < len(pcs) { + pcs[n] = store.staticPC(runtimeGoexitFrame, &store.goexitPCBase, callersPCValue) + n++ } - _ = add(runtimeMainFrame) - _ = add(runtimeGoexitFrame) return n } @@ -321,7 +350,41 @@ func (s *callerLocationStore) captureFrame(frame CallerFrame, pcValue uintptr) C return rec } +// capturePC returns the synthetic PC for a shadow-stack slot, memoizing the +// interned base in the slot so an unchanged frame costs two loads instead of +// a hash probe plus frame comparison. +func (s *callerLocationStore) capturePC(frame *CallerFrame, pcValue uintptr) uintptr { + if frame.captured != 0 { + return frame.captured | pcValue + } + idx := s.internSyntheticFrame(*frame) + base := uintptr(idx+1) << 2 + frame.captured = base + return base | pcValue +} + +// captureFrameAt is capturePC plus the full frame copy Caller needs. +func (s *callerLocationStore) captureFrameAt(frame *CallerFrame, pcValue uintptr) CallerFrame { + pc := s.capturePC(frame, pcValue) + rec := s.synthetic[(pc>>2)-1] + rec.PC = pc + if rec.Entry == 0 { + rec.Entry = rec.PC + } + return rec +} + +// staticPC memoizes the synthetic PC base of a process-static frame (e.g. +// runtime.main) in the per-store cache slot. +func (s *callerLocationStore) staticPC(frame CallerFrame, cache *uintptr, pcValue uintptr) uintptr { + if *cache == 0 { + *cache = uintptr(s.internSyntheticFrame(frame)+1) << 2 + } + return *cache | pcValue +} + func (s *callerLocationStore) internSyntheticFrame(frame CallerFrame) int { + frame.captured = 0 if len(s.syntheticHash) == 0 { s.syntheticHash = make([]uintptr, callerPCHashInit) } From 3912534e099e1e0441e65224c20b0916ede37762 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 16:08:23 +0800 Subject: [PATCH 28/59] runtime: reuse FuncForPC cache in Frames.Next; document LTO inline-copy limit Frames.Next allocated a fresh *Func per symbolized frame; route it through the FuncForPC 4-way cache so repeated CallersFrames walks over the same PCs stop allocating. hot.CallersFramesFirst: macOS 471->456ns (338ns with LTO, Go 1.26: 406ns); Linux LTO reaches parity at 433ns. Also document a pre-existing limitation at the entry-site emitter: the body-embedded inline-asm record is duplicated by LTO inlining into every inline site (~4x section growth on multipkg) and registers host-function PCs under the inlinee's symbol ID. Runtime only consults the table when native symbolization fails, which bounds the impact; the fix (data globals with !associated metadata) needs LLVMGlobalSetMetadata in the llvm binding and lands with the link-phase ftab work. Co-Authored-By: Claude Fable 5 --- internal/build/funcinfo_table.go | 19 +++++++++++---- .../lib/runtime/pprof_runtime_stub_llgo.go | 24 +++++++++++++++++++ runtime/internal/lib/runtime/symtab.go | 8 +------ 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index b6f3857ce0..33838e5fa1 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -721,11 +721,20 @@ func emitFuncInfoEntrySites(ctx *context, pkg llssa.Package) { } // This is LLGo's DCE-safe substitute for the function PC list that Go's // linker has while building pclntab. The inline-asm fragment lives in a - // section tied to the function body (SHF_LINK_ORDER on ELF; on Mach-O the - // record is removed with the function by IR-level global DCE), so dead - // functions do not leave stale entry records behind. Runtime still sorts - // these final PCs before building the Go-style findfunc bucket index, - // because LLVM IR generation does not know final linked text order. + // section tied to the function body (SHF_LINK_ORDER on ELF; live_support + // on Mach-O), so dead functions do not leave stale entry records behind. + // Runtime still sorts these final PCs before building the Go-style + // findfunc bucket index, because LLVM IR generation does not know final + // linked text order. + // + // Known limitation: because the record is emitted inside the function + // body, LTO inlining duplicates it into every inline site, bloating the + // section (~4x on multipkg) and registering host-function PCs under the + // inlinee's symbol ID; the runtime only consults this table when native + // symbolization fails, which bounds the impact. The fix is to emit the + // records as data globals carrying the function address with !associated + // metadata instead of body-embedded asm — that needs LLVMGlobalSetMetadata + // in the llvm binding and lands with the link-phase ftab work. machO := shouldEmitRuntimeMachOSites(ctx) llvmCtx := mod.Context() builder := llvmCtx.NewBuilder() diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index c8528de635..b52e372268 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -188,6 +188,30 @@ func newFuncForPC(pc uintptr, sym pcSymbol) *Func { } } +// frameFuncForPC returns the *Func for a frame PC that Frames.Next already +// symbolized, going through the FuncForPC cache so repeated CallersFrames +// walks over the same PCs stop allocating a Func per frame. +func frameFuncForPC(pc uintptr, sym pcSymbol, name string) *Func { + if fn := funcForPCLast.fn; fn != nil && funcForPCLast.pc == pc { + return fn + } + set := &funcForPCCache[funcForPCCacheIndex(pc)] + for i := 0; i < funcForPCCacheWays; i++ { + if fn := set[i].fn; fn != nil && set[i].pc == pc { + return fn + } + } + fn := &Func{ + entry: sym.entry, + name: name, + pc: pc, + file: sym.file, + line: sym.line, + } + cacheFuncForPC(pc, fn) + return fn +} + func cacheFuncForPC(pc uintptr, fn *Func) { setIndex := funcForPCCacheIndex(pc) set := &funcForPCCache[setIndex] diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 2ed6d3f9ab..e65c0e9843 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -1744,13 +1744,7 @@ func (ci *Frames) Next() (frame Frame, more bool) { } var f *Func if sym.entry != 0 || fn != "" { - f = &Func{ - entry: sym.entry, - name: fn, - pc: pc, - file: sym.file, - line: sym.line, - } + f = frameFuncForPC(pc, sym, fn) } ci.frames = append(ci.frames, Frame{ PC: pc, From 878bce81a2acfe697cdb42333888c60264d7d375 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 16:16:26 +0800 Subject: [PATCH 29/59] build: document why body-embedded site records resist LTO dedup Record the experiment results at the emitter: !associated only guides linker GC and IR-level GlobalDCE deletes the records; llvm.compiler.used pins dead functions through the records' address initializers; and noduplicate blocks inlining. Section dedup is link-phase work. Co-Authored-By: Claude Fable 5 --- internal/build/funcinfo_table.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index 33838e5fa1..6ff78d5a8b 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -731,10 +731,13 @@ func emitFuncInfoEntrySites(ctx *context, pkg llssa.Package) { // body, LTO inlining duplicates it into every inline site, bloating the // section (~4x on multipkg) and registering host-function PCs under the // inlinee's symbol ID; the runtime only consults this table when native - // symbolization fails, which bounds the impact. The fix is to emit the - // records as data globals carrying the function address with !associated - // metadata instead of body-embedded asm — that needs LLVMGlobalSetMetadata - // in the llvm binding and lands with the link-phase ftab work. + // symbolization fails, which bounds the impact. Data-global alternatives + // were tried and do not work with the current LLVM semantics: !associated + // affects only linker GC, so IR-level GlobalDCE deletes every record; + // keeping records via llvm.compiler.used makes their function-address + // initializers pin dead functions alive; and noduplicate on the asm call + // blocks inlining outright. Deduplicating the section is therefore + // link-phase work and lands together with the final ftab generation. machO := shouldEmitRuntimeMachOSites(ctx) llvmCtx := mod.Context() builder := llvmCtx.NewBuilder() From bdad60eb6c2a1f4c0a0bea0fc316593eab1c95b4 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 16:58:35 +0800 Subject: [PATCH 30/59] doc: design for link-phase ftab/findfunctab generation Post-link table generation plan: parse the linked binary's metadata sections, dedup LTO inline copies against the symbol table, sort with a sentinel, build Go-layout findfunctab via internal/pclntab, and write back into a reserved section with ASLR-safe anchor offsets. Runtime adopts the prebuilt table when the header validates and keeps first-use construction as fallback. Includes the list of platform facts established in #2012 so implementation does not re-derive them. Co-Authored-By: Claude Fable 5 --- doc/design/pclntab-linkphase.md | 119 ++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 doc/design/pclntab-linkphase.md diff --git a/doc/design/pclntab-linkphase.md b/doc/design/pclntab-linkphase.md new file mode 100644 index 0000000000..eb35f77d2a --- /dev/null +++ b/doc/design/pclntab-linkphase.md @@ -0,0 +1,119 @@ +# Link-phase ftab/findfunctab generation + +Status: design + staged plan. Depends on #2012 (runtime funcinfo find index) +and benefits from #2015 (nanosecond monotonic clock, for honest benchmarks). + +## Problem + +#2012 builds the sorted function-entry table and the Go-style findfunctab at +**first use in the running process**, because LLVM IR generation does not know +final linked text order. This leaves four measured gaps against Go 1.26: + +1. `cold.FirstFuncForPC`: 36µs on macOS / 12µs on Linux vs Go's 2.4µs / 375ns. + The cold fast path (bounded linear scan of raw entry sections, then dladdr) + is a transitional mechanism; Go needs none of it because the linker ships a + sorted table. +2. LTO inlining duplicates the body-embedded entry-site inline asm into every + inline site: `llgo_funcinfo_entry` grew ~4x on the multipkg benchmark and + host-function PCs get registered under the inlinee's symbol ID. IR-level + fixes were tried and ruled out (see Facts below); dedup must happen after + final code generation. +3. The runtime keeps ~300 lines of transitional complexity: cold lookup + budget, section scans, first-use sort, entry-PC slack matching. +4. pcvalue-style instruction-level line tables (the next alignment step with + Go) need a per-function table keyed by final text order. + +## Approach: post-link table generation + +Insert a post-link step into `internal/build` after the final clang/lld link: + +``` +link -> post-link tool: parse binary -> sort/dedup -> build buckets -> write back +``` + +A separate linker plugin was considered and rejected: llgo drives stock +clang/lld and a plugin would need to be maintained per linker flavor +(ld64.lld, ld.lld) and per LTO mode. Editing the linked artifact is +linker-agnostic. + +### Data flow + +1. **Parse** the linked binary's metadata sections (`debug/elf`, + `debug/macho` from the Go stdlib — the tool runs on the host): + - `llgo_funcinfo_entry` / `__DATA,__llgo_fie`: `{pc, symbolID}` records. + - `llgo_funcinfo_stubsite` / `__DATA,__llgo_stub`: same layout. + - Zero records are skipped, as in the runtime today. +2. **Dedup by symbolID**: LTO inline copies register the same symbolID at + several PCs. The true entry is the record whose PC lies inside the text + range of the symbol that owns the symbolID; resolve via the binary's + symbol table (`.symtab` / `nlist`). Records that fall inside a different + function's range are inline copies — drop them. This is the fix for gap 2 + that IR-level metadata could not express. +3. **Sort** by PC; append a sentinel entry (end of text) so the runtime can + use Go's forward-scan lookup shape (`internal/pclntab.LookupFuncIndex`). +4. **Build buckets** with `internal/pclntab.BuildFindFuncBuckets` — the + faithful port of `cmd/link`'s algorithm that has been sitting unwired + since #2012. Delta overflow is a hard error here, mirroring Go's linker; + if it ever fires, fall back to leaving the prebuilt table absent. +5. **Write back** into a reserved section: + - The main module already emits `__llgo_funcinfo_*` globals; add a + `__llgo_pclntab_prebuilt` global sized from the collected package data + (entry-record count is known at main-module emission time; LTO can only + shrink it after dedup) plus a header {magic, version, count, anchorOff}. + - The tool rewrites the section contents in place (same size or smaller; + unused tail is zeroed) and flips the header magic to "valid". + +### ASLR + +Stored PCs must survive load-time slide. Store **offsets relative to an +anchor symbol** (`__llgo_pclntab_anchor`, placed in the same section). At +startup the runtime computes `slide = &anchor_runtime - anchorOff_stored` +and adds it during lookup (one add on the hot path, same as Go's +`datap.text` bias). Note the entry-site records themselves are already +rebased by the loader (they hold absolute pointers with relocations); the +prebuilt table deliberately holds offsets so the tool does not need to +emit relocations. + +### Runtime integration + +`initRuntimeFuncPCFramesOnce` gains a fast path: if the prebuilt header is +valid, adopt the table directly (no section scan, no sort, no bucket build) +— `FirstFuncForPC` becomes bucket-lookup cost, matching Go's shape. The +existing first-use construction remains as the fallback whenever the header +is invalid (older compilers, exotic formats, overflow bail-out), so the +change is strictly additive and safe to land incrementally. + +## Staging + +- **P1** `chore/pclnpost`: standalone tool, parse + dedup + sort + bucket + build + stats printing; golden tests against binaries produced by the + existing test programs. No behavior change. +- **P2** Reserve the section in `internal/build`, run the tool as a post-link + step, wire the runtime fast path. Benchmarks: cold.FirstFuncForPC on both + platforms; assert `llgo funcinfo: ... entries= prebuilt` via + LLGO_FUNCINFO_DEBUG. +- **P3** Remove transitional runtime code (cold budget/scan, first-use sort + path stays as fallback but slack matching can go once anchors are exact + entries from the symbol table). +- **P4** pcvalue-style line tables keyed by the prebuilt function order + (replaces the call-site pcline records; gives instruction-level FileLine). + +## Established facts (verified in #2012 work; do not re-derive) + +- Mach-O metadata sections need `live_support` + one lowercase-`l` + linker-private symbol per record; ld64/lld `-dead_strip` then drops records + exactly with their function. Verified with lld 19.1.7, including LTO. +- Boundary symbols: ELF `__start_/__stop_`; Mach-O `section$start$SEG$SECT` + referenced from IR needs the `\x01` verbatim-name prefix or LLVM prepends + an underscore and the linker stops recognizing it. +- Visible (non-`L`) labels inside Mach-O function bodies split the function + into atoms that the linker may reorder — assembler-local labels only. +- `!associated` affects only linker GC; IR-level GlobalDCE deletes such + globals regardless, and `llvm.compiler.used` pins dead functions through + the records' initializers. This is why records stay body-embedded inline + asm and dedup happens post-link. +- `internal/pclntab` is a faithful port of Go 1.26's findfunctab generation + and lookup (uint8 deltas, overflow error, forward scan, sentinel); the + runtime's in-process variant deliberately uses uint16 deltas because LLGo + lacks Go's MINFUNC guarantee. The post-link table can use the faithful + uint8 layout since dedup restores the one-record-per-function invariant. From 23ce12bd38ec1fadc2f18cfec3f5476b3cf7be7c Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 16:42:28 +0800 Subject: [PATCH 31/59] runtime: nanosecond monotonic clock on darwin and linux The monotonic time source had two problems: - On Linux, runtimeNano passed clite's CLOCK_MONOTONIC, whose value is Darwin's clock id (6). Linux interprets 6 as CLOCK_MONOTONIC_COARSE, a millisecond-granularity clock: consecutive time.Now() readings were identical 100% of the time and the smallest nonzero delta was 1ms. - On Darwin, clock_gettime(CLOCK_MONOTONIC) itself only has microsecond granularity (96% identical consecutive readings, 1us minimum delta). Mirror Go's runtime structure with a per-OS nanotime1 in the runtime package itself, keeping the hot path free of clite indirection and clite unchanged: Darwin reads CLOCK_UPTIME_RAW through clock_gettime_nsec_np (the same clock Go's nanotime uses there), Linux uses clock_gettime with the OS-correct CLOCK_MONOTONIC id as a local constant, and remaining platforms keep the previous behavior. Measured with consecutive time.Now() deltas (min nonzero / zero-frac): - macOS arm64: 1us / 96.5% -> 41ns / 26% (Go 1.26: 41ns / 22%) - Linux arm64: 1ms / 100% -> 41ns / 21% time.Sleep, Timer and Ticker behave identically before and after. Co-Authored-By: Claude Fable 5 --- .../lib/runtime/nanotime_darwin_llgo.go | 36 +++++++++++++++++ .../lib/runtime/nanotime_linux_llgo.go | 40 +++++++++++++++++++ .../lib/runtime/nanotime_other_llgo.go | 33 +++++++++++++++ runtime/internal/lib/runtime/time_llgo.go | 4 +- .../internal/lib/runtime/time_llgo_go123.go | 4 +- 5 files changed, 111 insertions(+), 6 deletions(-) create mode 100644 runtime/internal/lib/runtime/nanotime_darwin_llgo.go create mode 100644 runtime/internal/lib/runtime/nanotime_linux_llgo.go create mode 100644 runtime/internal/lib/runtime/nanotime_other_llgo.go diff --git a/runtime/internal/lib/runtime/nanotime_darwin_llgo.go b/runtime/internal/lib/runtime/nanotime_darwin_llgo.go new file mode 100644 index 0000000000..7d487edadf --- /dev/null +++ b/runtime/internal/lib/runtime/nanotime_darwin_llgo.go @@ -0,0 +1,36 @@ +//go:build darwin && !baremetal + +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package runtime + +import ( + _ "unsafe" +) + +// Mirrors Go's runtime.nanotime1 on Darwin (sys_darwin.go): read +// CLOCK_UPTIME_RAW through clock_gettime_nsec_np. Darwin serves +// clock_gettime(CLOCK_MONOTONIC) with only microsecond granularity, while +// CLOCK_UPTIME_RAW is mach_absolute_time with full nanosecond resolution. +const _CLOCK_UPTIME_RAW = 8 + +//go:linkname c_clock_gettime_nsec_np C.clock_gettime_nsec_np +func c_clock_gettime_nsec_np(clockID int32) uint64 + +func nanotime1() int64 { + return int64(c_clock_gettime_nsec_np(_CLOCK_UPTIME_RAW)) +} diff --git a/runtime/internal/lib/runtime/nanotime_linux_llgo.go b/runtime/internal/lib/runtime/nanotime_linux_llgo.go new file mode 100644 index 0000000000..a07def21eb --- /dev/null +++ b/runtime/internal/lib/runtime/nanotime_linux_llgo.go @@ -0,0 +1,40 @@ +//go:build linux && !baremetal + +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package runtime + +import ( + "unsafe" + + c "github.com/goplus/llgo/runtime/internal/clite" + ct "github.com/goplus/llgo/runtime/internal/clite/time" +) + +// Linux CLOCK_MONOTONIC (see ), which has nanosecond +// resolution. Deliberately a local constant: ct.CLOCK_MONOTONIC carries +// Darwin's id (6), which Linux interprets as CLOCK_MONOTONIC_COARSE — a +// millisecond-granularity clock that quantized every monotonic timestamp +// the runtime produced. +const _CLOCK_MONOTONIC = 1 + +// nanotime1 mirrors Go's runtime.nanotime1 on Linux. +func nanotime1() int64 { + tv := (*ct.Timespec)(c.Alloca(unsafe.Sizeof(ct.Timespec{}))) + ct.ClockGettime(ct.ClockidT(_CLOCK_MONOTONIC), tv) + return int64(tv.Sec)*1e9 + int64(tv.Nsec) +} diff --git a/runtime/internal/lib/runtime/nanotime_other_llgo.go b/runtime/internal/lib/runtime/nanotime_other_llgo.go new file mode 100644 index 0000000000..8478b3ec8f --- /dev/null +++ b/runtime/internal/lib/runtime/nanotime_other_llgo.go @@ -0,0 +1,33 @@ +//go:build !darwin && !linux && !baremetal + +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package runtime + +import ( + "unsafe" + + c "github.com/goplus/llgo/runtime/internal/clite" + ct "github.com/goplus/llgo/runtime/internal/clite/time" +) + +// nanotime1 keeps the previous behavior on remaining platforms. +func nanotime1() int64 { + tv := (*ct.Timespec)(c.Alloca(unsafe.Sizeof(ct.Timespec{}))) + ct.ClockGettime(ct.CLOCK_MONOTONIC, tv) + return int64(tv.Sec)*1e9 + int64(tv.Nsec) +} diff --git a/runtime/internal/lib/runtime/time_llgo.go b/runtime/internal/lib/runtime/time_llgo.go index 0a09c98e07..3f60f7d348 100644 --- a/runtime/internal/lib/runtime/time_llgo.go +++ b/runtime/internal/lib/runtime/time_llgo.go @@ -547,7 +547,5 @@ func timeSleepWake(arg any, _ uintptr) { } func runtimeNano() int64 { - tv := (*ct.Timespec)(c.Alloca(unsafe.Sizeof(ct.Timespec{}))) - ct.ClockGettime(ct.CLOCK_MONOTONIC, tv) - return int64(tv.Sec)*1e9 + int64(tv.Nsec) + return nanotime1() } diff --git a/runtime/internal/lib/runtime/time_llgo_go123.go b/runtime/internal/lib/runtime/time_llgo_go123.go index ceb2d73ed4..65bbff2f4d 100644 --- a/runtime/internal/lib/runtime/time_llgo_go123.go +++ b/runtime/internal/lib/runtime/time_llgo_go123.go @@ -581,7 +581,5 @@ func resetTimer(t *timeTimer, when, period int64) bool { } func runtimeNano() int64 { - tv := (*ct.Timespec)(c.Alloca(unsafe.Sizeof(ct.Timespec{}))) - ct.ClockGettime(ct.CLOCK_MONOTONIC, tv) - return int64(tv.Sec)*1e9 + int64(tv.Nsec) + return nanotime1() } From 35b77053598c9d0b33c412ba0975d39e86228ec7 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 17:28:34 +0800 Subject: [PATCH 32/59] build: disable funcinfo metadata emission in debug builds The macOS CI LLDB step caught the funcinfo entry/stub site anchors shifting instruction/scope layout: with the records emitted at function entry, LLDB reported variables from an inner lexical block (ScopeIf's b, c) as in scope before the block began. Debug builds carry full DWARF, so the funcinfo tables are redundant there; gate the metadata pipeline on !IsDbgEnabled(). Caller-frame instrumentation is independent of this switch, so runtime.Caller keeps working in debug builds. _lldb/runtest.sh: 194/194 pass. This also covers Linux, where the same interference existed since the sites were introduced but the LLDB suite only runs on the macOS jobs. Co-Authored-By: Claude Fable 5 --- internal/build/build.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/build/build.go b/internal/build/build.go index f5a7c87f8a..2832ab1133 100644 --- a/internal/build/build.go +++ b/internal/build/build.go @@ -318,7 +318,13 @@ func Do(args []string, conf *Config) ([]Package, error) { prog := llssa.NewProgram(target) prog.EnableGoGlobalDCE(conf.goGlobalDCEEnabled()) - prog.EnableFuncInfoMetadata(conf.Mode != ModeGen && IsFuncInfoEnabled()) + // Debug builds carry full DWARF, so the funcinfo metadata tables are + // redundant there — and the site records' body-embedded anchors shift + // instruction/scope layout enough to confuse debuggers (LLDB reported + // variables from an inner lexical block as in scope before the block + // began). Caller-frame instrumentation is independent of this switch, so + // runtime.Caller keeps working in debug builds. + prog.EnableFuncInfoMetadata(conf.Mode != ModeGen && IsFuncInfoEnabled() && !IsDbgEnabled()) sizes := func(sizes types.Sizes, compiler, arch string) types.Sizes { if arch == "wasm" { sizes = &types.StdSizes{WordSize: 4, MaxAlign: 4} From b04b0a92f7f7c1aea5310ac76362bd6e24d45bfa Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 17:56:54 +0800 Subject: [PATCH 33/59] build,ssa,cl: gate only site records off in debug builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refine the previous commit: instead of disabling the whole funcinfo metadata pipeline under LLGO_DEBUG/LLGO_DEBUG_SYMBOLS, add a separate Program.EnableFuncInfoSites switch and turn off just the body-embedded site records (entry/stub anchors and pc-line labels) — they are what shifts instruction/scope layout and confused LLDB. The funcinfo tables are plain data globals and stay enabled, so runtime.FuncForPC keeps its normalized name and Func.FileLine keeps file/line in debug builds (via the dlsym fallback path); runtime.Caller/Callers were never affected because caller-frame instrumentation is independent of both switches. Debug builds lose only the section fast paths (first-use latency) and statement-level pc-line granularity, both redundant next to full DWARF. _lldb/runtest.sh: 194/194; cl and test/go suites pass. Co-Authored-By: Claude Fable 5 --- cl/caller_frame_test.go | 6 ++++++ cl/compile.go | 2 +- cl/funcinfo_metadata_test.go | 1 + cl/instr.go | 2 +- internal/build/build.go | 17 ++++++++++------- internal/build/funcinfo_table.go | 7 ++++++- internal/build/funcinfo_table_test.go | 4 ++++ ssa/funcinfo.go | 14 ++++++++++++++ ssa/package.go | 1 + ssa/ssa_test.go | 1 + 10 files changed, 45 insertions(+), 10 deletions(-) diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index ec252c14fb..017b8d3473 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -524,6 +524,7 @@ func leaf() {} prog.Target().GOOS = "linux" prog.Target().GOARCH = "amd64" prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(true) pkg, err := NewPackage(prog, ssapkg, files) if err != nil { t.Fatal(err) @@ -561,6 +562,7 @@ func top() { `) prog := newLLSSAProgForTarget(t, &llssa.Target{GOOS: "linux", GOARCH: "386"}) prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(true) pkg, err := NewPackage(prog, ssapkg, files) if err != nil { t.Fatal(err) @@ -590,6 +592,7 @@ func top() { prog.Target().GOOS = "linux" prog.Target().GOARCH = "amd64" prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(true) pkg, err := NewPackage(prog, ssapkg, files) if err != nil { t.Fatal(err) @@ -618,6 +621,7 @@ func top() { `) prog := newLLSSAProgForTarget(t, &llssa.Target{GOOS: "linux", GOARCH: "amd64"}) prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(true) pkg := prog.NewPackage("foo", "example.com/foo") fn := pkg.NewFunc("example.com/foo.top", llssa.NoArgsNoRet, llssa.InGo) ctx := &context{ @@ -664,6 +668,7 @@ func top() { prog.Target().GOOS = "darwin" prog.Target().GOARCH = "arm64" prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(true) pkg, err := NewPackage(prog, ssapkg, files) if err != nil { t.Fatal(err) @@ -745,6 +750,7 @@ func f() { _ = runtime.FuncForPC(0) } prog.Target().GOOS = "linux" prog.Target().GOARCH = "amd64" prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(true) pkg, err = NewPackage(prog, ssapkg, files) if err != nil { t.Fatal(err) diff --git a/cl/compile.go b/cl/compile.go index ff3f31689f..3cd538dd80 100644 --- a/cl/compile.go +++ b/cl/compile.go @@ -683,7 +683,7 @@ func needsRuntimeStackNoInline(pkg *types.Package, f *ssa.Function) bool { } func (p *context) needsPCLineNoInline(f *ssa.Function) bool { - if p == nil || f == nil || !p.prog.FuncInfoMetadataEnabled() || !p.trackCallerFrames || !p.runtimeCallerFuncs[f] { + if p == nil || f == nil || !p.prog.FuncInfoSitesEnabled() || !p.trackCallerFrames || !p.runtimeCallerFuncs[f] { return false } if !canEmitPCLineLabelsForTarget(p.prog.Target()) { diff --git a/cl/funcinfo_metadata_test.go b/cl/funcinfo_metadata_test.go index 902813cfad..5af5800613 100644 --- a/cl/funcinfo_metadata_test.go +++ b/cl/funcinfo_metadata_test.go @@ -52,6 +52,7 @@ func (T) method() {} ` ir := cltest.CompileIREx(t, src, "foo.go", false, func(prog llssa.Program) { prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(true) }) for _, want := range []string{ diff --git a/cl/instr.go b/cl/instr.go index 8f8cbf240d..8530ff0ee5 100644 --- a/cl/instr.go +++ b/cl/instr.go @@ -1390,7 +1390,7 @@ func (p *context) recordCallerLocationForCall(b llssa.Builder, call *ssa.CallCom } func (p *context) emitPCLineLabel(b llssa.Builder, pos token.Pos) { - if p == nil || p.pkg == nil || p.fn == nil || !p.prog.FuncInfoMetadataEnabled() || !p.shouldTrackCallerFrames() { + if p == nil || p.pkg == nil || p.fn == nil || !p.prog.FuncInfoSitesEnabled() || !p.shouldTrackCallerFrames() { return } target := p.prog.Target() diff --git a/internal/build/build.go b/internal/build/build.go index 2832ab1133..d2e88a75c7 100644 --- a/internal/build/build.go +++ b/internal/build/build.go @@ -318,13 +318,16 @@ func Do(args []string, conf *Config) ([]Package, error) { prog := llssa.NewProgram(target) prog.EnableGoGlobalDCE(conf.goGlobalDCEEnabled()) - // Debug builds carry full DWARF, so the funcinfo metadata tables are - // redundant there — and the site records' body-embedded anchors shift - // instruction/scope layout enough to confuse debuggers (LLDB reported - // variables from an inner lexical block as in scope before the block - // began). Caller-frame instrumentation is independent of this switch, so - // runtime.Caller keeps working in debug builds. - prog.EnableFuncInfoMetadata(conf.Mode != ModeGen && IsFuncInfoEnabled() && !IsDbgEnabled()) + funcInfo := conf.Mode != ModeGen && IsFuncInfoEnabled() + prog.EnableFuncInfoMetadata(funcInfo) + // Site records are inline-asm fragments inside function bodies; their + // anchors shift instruction/scope layout enough to confuse debuggers + // (LLDB reported variables from an inner lexical block as in scope before + // the block began). Debug builds keep the metadata tables — FuncForPC + // name/FileLine fidelity survives via the dlsym path — but drop the + // sites. Caller-frame instrumentation is independent of both switches, + // so runtime.Caller keeps working in debug builds. + prog.EnableFuncInfoSites(funcInfo && !IsDbgEnabled()) sizes := func(sizes types.Sizes, compiler, arch string) types.Sizes { if arch == "wasm" { sizes = &types.StdSizes{WordSize: 4, MaxAlign: 4} diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index 6ff78d5a8b..7bfbe6d636 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -617,8 +617,13 @@ func shouldEmitRuntimeMachOSites(ctx *context) bool { // associated sections (honored by --gc-sections). Mach-O uses live_support // sections: under ld64/lld -dead_strip a live_support atom survives only if // the atom it references (the anchor inside the function body) is live, which -// is the same records-follow-function semantics. +// is the same records-follow-function semantics. Sites are additionally +// gated per Program: debug builds keep the funcinfo tables but drop the +// body-embedded site records (see Program.EnableFuncInfoSites). func shouldEmitRuntimeSites(ctx *context) bool { + if ctx == nil || ctx.prog == nil || !ctx.prog.FuncInfoSitesEnabled() { + return false + } return shouldEmitRuntimeELFSites(ctx) || shouldEmitRuntimeMachOSites(ctx) } diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index adfab2849b..3afb37bdef 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -29,6 +29,7 @@ import ( func TestFuncInfoTableMaterializesMetadataWithoutFunctionPointers(t *testing.T) { prog := llssa.NewProgram(nil) + prog.EnableFuncInfoSites(true) src := prog.NewPackage("example.com/p", "example.com/p") src.EmitFuncInfo("example.com/p.live", "example.com/p.Live", "live.go", 17, 3) src.EmitFuncInfo("example.com/p.live", "example.com/p.LiveDuplicate", "dup.go", 19, 1) @@ -110,6 +111,7 @@ func TestFuncInfoTableMaterializesEntrySites(t *testing.T) { }, } prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(true) emitFuncInfoEntrySites(ctx, src) srcIR := src.String() for _, want := range []string{ @@ -190,6 +192,7 @@ func TestFuncInfoTableMaterializesClosureStubIndexes(t *testing.T) { }, } prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(true) emitFuncInfoStubSites(ctx, src) srcIR := src.String() for _, want := range []string{ @@ -266,6 +269,7 @@ func TestFuncInfoTableMaterializesClosureStubIndexes(t *testing.T) { func TestFuncInfoTableMaterializesPCLineMetadata(t *testing.T) { prog := llssa.NewProgram(nil) + prog.EnableFuncInfoSites(true) src := prog.NewPackage("example.com/p", "example.com/p") src.EmitFuncInfo("example.com/p.live", "example.com/p.Live", "live.go", 17, 3) src.EmitPCLineInfo(0x1234, "example.com/p.live", "call.go", 23, 5) diff --git a/ssa/funcinfo.go b/ssa/funcinfo.go index 4dbc8f08e1..7e3979a9be 100644 --- a/ssa/funcinfo.go +++ b/ssa/funcinfo.go @@ -37,6 +37,20 @@ func (p Program) FuncInfoMetadataEnabled() bool { return p.enableFuncInfoMetadata } +// EnableFuncInfoSites controls emission of the per-function site records +// (entry/stub/pc-line inline-asm fragments inside function bodies). They are +// gated separately from the funcinfo metadata tables because the +// body-embedded anchors shift instruction/scope layout enough to confuse +// debuggers; debug builds keep the tables (FuncForPC name/FileLine fidelity +// via the dlsym path) but drop the sites. +func (p Program) EnableFuncInfoSites(enable bool) { + p.enableFuncInfoSites = enable +} + +func (p Program) FuncInfoSitesEnabled() bool { + return p.enableFuncInfoSites +} + // EmitFuncInfo records a function's linker symbol, Go name, and declaration // source position as LLVM named metadata. The row layout is: // diff --git a/ssa/package.go b/ssa/package.go index f0f118d639..9826102218 100644 --- a/ssa/package.go +++ b/ssa/package.go @@ -235,6 +235,7 @@ type aProgram struct { enableGoGlobalDCE bool enableFuncInfoMetadata bool + enableFuncInfoSites bool } type AbiSymbol struct { diff --git a/ssa/ssa_test.go b/ssa/ssa_test.go index 3e81d24010..38160c8696 100644 --- a/ssa/ssa_test.go +++ b/ssa/ssa_test.go @@ -252,6 +252,7 @@ func testFuncInfoMetadataDoesNotPreserveFunctions(t *testing.T) { t.Fatal("funcinfo metadata should be disabled by default") } prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(true) if !prog.FuncInfoMetadataEnabled() { t.Fatal("funcinfo metadata should be enabled") } From ab7e17f5f1f5c3a0f7fa600965ed0a206238fa9b Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 18:18:50 +0800 Subject: [PATCH 34/59] runtime: keep unresolved-entry frames out of the FuncForPC cache frameFuncForPC could cache a Func built from a pcline frame whose entry resolution failed (entry == 0); a later FuncForPC on the same PC would then observe Entry() == 0 where its own constructor falls back to pc. Co-Authored-By: Claude Fable 5 --- runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index b52e372268..3da596f877 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -208,7 +208,12 @@ func frameFuncForPC(pc uintptr, sym pcSymbol, name string) *Func { file: sym.file, line: sym.line, } - cacheFuncForPC(pc, fn) + // FuncForPC's own constructor falls back to entry == pc; keep frames with + // an unresolved entry out of the shared cache so a later FuncForPC(pc) + // does not observe Entry() == 0. + if sym.entry != 0 { + cacheFuncForPC(pc, fn) + } return fn } From 8e9c92102635e224c5658205a6f6fd7e097b669b Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 18:47:54 +0800 Subject: [PATCH 35/59] build: add LLGO_FUNCINFO_SITES to toggle site records independently LLGO_FUNCINFO_SITES=0 keeps the funcinfo metadata tables but drops the body-embedded entry/stub/pc-line inline-asm sites. This is the narrow A/B needed to isolate codegen perturbation caused by the in-body asm anchors: with sites off, plain-code benchmarks match the no-funcinfo baseline within noise, while sites on shifts hot runtime-internal loops by -30%..+6% through inline/layout decisions. Semantics with sites off: FuncForPC(entry) and Func.FileLine(entry) keep working through the dlsym fallback path; statement/call-site granularity PC line lookup is disabled, and first-use table construction loses the section fast path. Tests assert the split: tables still materialize while entry/stub section asm, boundary symbols, and pc-line site labels are all absent. Co-Authored-By: Claude Fable 5 --- cl/caller_frame_test.go | 30 ++++++++++++++ internal/build/build.go | 11 +++++- internal/build/funcinfo_table_test.go | 57 +++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 1 deletion(-) diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index 017b8d3473..69d570d9ee 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -656,6 +656,36 @@ func top() { } } +func TestCompileRuntimeCallerPCLineMetadataSitesDisabled(t *testing.T) { + ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo +import "runtime" + +func top() { + runtime.Caller(0) +} +`) + prog := newLLSSAProg(t) + prog.Target().GOOS = "linux" + prog.Target().GOARCH = "amd64" + prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(false) + pkg, err := NewPackage(prog, ssapkg, files) + if err != nil { + t.Fatal(err) + } + ir := pkg.Module().String() + // Funcinfo metadata still flows... + if !strings.Contains(ir, llssa.FuncInfoMetadataName) { + t.Fatalf("sites disabled should keep funcinfo metadata:\n%s", ir) + } + // ...but no pc-line site labels are emitted. + for _, bad := range []string{"__llgo_pcsite_", ".pushsection llgo_pcline", "!llgo.pcline"} { + if strings.Contains(ir, bad) { + t.Fatalf("sites disabled should not emit pc-line sites, found %q:\n%s", bad, ir) + } + } +} + func TestCompileRuntimeCallerPCLineMetadataOnDarwin(t *testing.T) { ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo import "runtime" diff --git a/internal/build/build.go b/internal/build/build.go index d2e88a75c7..9af32042d7 100644 --- a/internal/build/build.go +++ b/internal/build/build.go @@ -327,7 +327,7 @@ func Do(args []string, conf *Config) ([]Package, error) { // name/FileLine fidelity survives via the dlsym path — but drop the // sites. Caller-frame instrumentation is independent of both switches, // so runtime.Caller keeps working in debug builds. - prog.EnableFuncInfoSites(funcInfo && !IsDbgEnabled()) + prog.EnableFuncInfoSites(funcInfo && !IsDbgEnabled() && IsFuncInfoSitesEnabled()) sizes := func(sizes types.Sizes, compiler, arch string) types.Sizes { if arch == "wasm" { sizes = &types.StdSizes{WordSize: 4, MaxAlign: 4} @@ -1832,6 +1832,7 @@ var ( const llgoDebug = "LLGO_DEBUG" const llgoDbgSyms = "LLGO_DEBUG_SYMBOLS" const llgoFuncInfo = "LLGO_FUNCINFO" +const llgoFuncInfoSites = "LLGO_FUNCINFO_SITES" const llgoTrace = "LLGO_TRACE" const llgoOptimize = "LLGO_OPTIMIZE" const llgoWasmRuntime = "LLGO_WASM_RUNTIME" @@ -1883,6 +1884,14 @@ func IsFuncInfoEnabled() bool { return isEnvOn(llgoFuncInfo, true) } +// IsFuncInfoSitesEnabled controls the body-embedded site records +// independently of the funcinfo tables (LLGO_FUNCINFO_SITES=0 keeps the +// metadata but drops entry/stub/pc-line inline-asm sites). Useful for +// isolating codegen perturbation caused by the in-body asm anchors. +func IsFuncInfoSitesEnabled() bool { + return isEnvOn(llgoFuncInfoSites, true) +} + func IsDbgSymsEnabled() bool { return isEnvOn(llgoDbgSyms, false) } diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index 3afb37bdef..1c330d3e3f 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -176,6 +176,63 @@ func TestFuncInfoTableMaterializesEntrySites(t *testing.T) { } } +func TestFuncInfoTableSitesDisabledKeepsTables(t *testing.T) { + prog := llssa.NewProgram(nil) + src := prog.NewPackage("example.com/p", "example.com/p") + src.EmitFuncInfo("example.com/p.live", "example.com/p.Live", "live.go", 17, 3) + src.EmitPCLineInfo(0x1234, "example.com/p.live", "call.go", 23, 5) + liveFn := src.NewFunc("example.com/p.live", llssa.NoArgsNoRet, llssa.InC) + liveFn.MakeBody(1).Return() + ctx := &context{ + prog: prog, + buildConf: &Config{ + BuildMode: BuildModeExe, + Goos: "linux", + Goarch: "amd64", + }, + } + prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(false) + + emitFuncInfoEntrySites(ctx, src) + emitFuncInfoStubSites(ctx, src) + srcIR := src.String() + for _, bad := range []string{"llgo_funcinfo_entry", "llgo_funcinfo_stubsite", "call void asm sideeffect"} { + if strings.Contains(srcIR, bad) { + t.Fatalf("sites disabled: package IR should not contain %q:\n%s", bad, srcIR) + } + } + + records := collectFuncInfo([]Package{{LPkg: src}}) + pcLines := collectPCLineInfo([]Package{{LPkg: src}}) + entry := genMainModule(ctx, llssa.PkgRuntime, &packages.Package{ + PkgPath: "example.com/main", + ExportFile: "main.a", + }, &genConfig{funcInfo: records, pcLineInfo: pcLines}) + ir := entry.LPkg.String() + // The metadata tables must still materialize... + for _, want := range []string{ + "@__llgo_funcinfo_table = global ptr", + "@__llgo_funcinfo_count = global", + "@__llgo_pcline_table = global ptr", + } { + if !strings.Contains(ir, want) { + t.Fatalf("sites disabled: funcinfo table IR missing %q:\n%s", want, ir) + } + } + // ...while the site sections and their boundary symbols must not. + for _, bad := range []string{ + "@__start_llgo_funcinfo_entry", + "@__start_llgo_funcinfo_stubsite", + "@__start_llgo_pcline", + "module asm \".section llgo_", + } { + if strings.Contains(ir, bad) { + t.Fatalf("sites disabled: funcinfo table IR should not contain %q:\n%s", bad, ir) + } + } +} + func TestFuncInfoTableMaterializesClosureStubIndexes(t *testing.T) { prog := llssa.NewProgram(nil) src := prog.NewPackage("example.com/p", "example.com/p") From 82fed995d9151af410f965395e5a859fc86b530d Mon Sep 17 00:00:00 2001 From: Li Jie Date: Fri, 3 Jul 2026 09:47:43 +0800 Subject: [PATCH 36/59] ci: make patch coverage reflect product code; cover pclntab edge paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit codecov/patch was failing at 51.77% (target 88.68%), but the shortfall was almost entirely benchmark/runtime_funcinfo/main.go — a standalone measurement harness with no unit tests by design (600 of 639 missed lines). Compiler-side changes were already covered (cl/instr.go 478/493, cl/compile.go 125/127). Ignore benchmark/** in codecov and cover the remaining internal/pclntab validation/lookup edges directly (96.2%). Co-Authored-By: Claude Fable 5 --- codecov.yml | 5 +++ internal/pclntab/pclntab_test.go | 60 ++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 codecov.yml diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000000..ee9a4cf210 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,5 @@ +# Benchmarks are standalone measurement harnesses, not product code paths; +# they have no unit tests by design and would otherwise dominate patch +# coverage whenever they grow. +ignore: + - "benchmark/**" diff --git a/internal/pclntab/pclntab_test.go b/internal/pclntab/pclntab_test.go index 26903ebbea..58d798ee89 100644 --- a/internal/pclntab/pclntab_test.go +++ b/internal/pclntab/pclntab_test.go @@ -49,3 +49,63 @@ func TestBuildFindFuncBucketsRejectsOverflow(t *testing.T) { t.Fatal("expected subbucket overflow error") } } + +func TestBuildFindFuncBucketsValidation(t *testing.T) { + if bs, err := BuildFindFuncBuckets(nil, 0); err != nil || bs != nil { + t.Fatalf("textSize=0: got %v, %v", bs, err) + } + cases := []struct { + name string + ftab []FuncTabEntry + }{ + {"too short", []FuncTabEntry{{EntryOff: 0}}}, + {"not increasing", []FuncTabEntry{{EntryOff: 0}, {EntryOff: 8}, {EntryOff: 8}}}, + {"first not zero", []FuncTabEntry{{EntryOff: 4}, {EntryOff: 64}}}, + {"sentinel below text", []FuncTabEntry{{EntryOff: 0}, {EntryOff: 16}}}, + } + for _, c := range cases { + if _, err := BuildFindFuncBuckets(c.ftab, 64); err == nil { + t.Fatalf("%s: expected error", c.name) + } + } +} + +func TestFuncIndexForPCEdges(t *testing.T) { + ftab := []FuncTabEntry{{EntryOff: 0}, {EntryOff: 16}, {EntryOff: 64}} + if got := FuncIndexForPC(ftab, 0); got != 0 { + t.Fatalf("pc=0: %d", got) + } + if got := FuncIndexForPC(ftab, 63); got != 1 { + t.Fatalf("pc=63: %d", got) + } + // At or past the sentinel, clamp to the last real function. + if got := FuncIndexForPC(ftab, 64); got != len(ftab)-2 { + t.Fatalf("pc=sentinel: %d", got) + } + if got := FuncIndexForPC(ftab, 1<<20); got != len(ftab)-2 { + t.Fatalf("pc=big: %d", got) + } +} + +func TestLookupFuncIndexEdges(t *testing.T) { + ftab := []FuncTabEntry{{EntryOff: 0}, {EntryOff: 16}, {EntryOff: 8192}} + buckets, err := BuildFindFuncBuckets(ftab, 8192) + if err != nil { + t.Fatal(err) + } + if got := LookupFuncIndex(nil, buckets, 0); got != -1 { + t.Fatalf("short ftab: %d", got) + } + if got := LookupFuncIndex(ftab, nil, 0); got != -1 { + t.Fatalf("no buckets: %d", got) + } + if got := LookupFuncIndex(ftab, buckets, 1<<30); got != -1 { + t.Fatalf("pc out of buckets: %d", got) + } + if got := LookupFuncIndex(ftab, buckets, 17); got != 1 { + t.Fatalf("pc=17: %d", got) + } + if got := LookupFuncIndex(ftab, buckets, 8191); got != 1 { + t.Fatalf("pc=8191: %d", got) + } +} From 4e5d53a10335c9fbaf71356aa2384f26cf7ba4e5 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Fri, 3 Jul 2026 10:12:53 +0800 Subject: [PATCH 37/59] ci: upload coverage from both platforms Only macOS ran tests with -coverprofile, so lines behind OS-specific branches (ELF emission, per-OS runtime shims) always showed as missed in codecov/patch even though the ubuntu job executed them. Co-Authored-By: Claude Fable 5 --- .github/workflows/go.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index abf6b3bb67..e681a9df81 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -69,12 +69,10 @@ jobs: - name: Build run: go build -v ./... - - name: Test - if: ${{!startsWith(matrix.os, 'macos')}} - run: go test -timeout 30m ./... - + # Both platforms upload coverage: OS-specific paths (ELF vs Mach-O + # emission, per-OS runtime shims) are otherwise invisible to + # codecov/patch and fail it on lines only the other OS executes. - name: Test with coverage - if: startsWith(matrix.os, 'macos') run: go test -timeout 30m -coverprofile="coverage.txt" -covermode=atomic ./... - name: Test with embedded emulator env From ded3c7447155ca11ac2ade3fd1bdbd4bf6f51211 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Fri, 3 Jul 2026 11:00:20 +0800 Subject: [PATCH 38/59] build: sweep funcinfo emission across OS/pointer-size/content matrix Covers the ELF and Mach-O directive branches, 32-bit pointer directives, quote-escaped symbol names and empty-table emission from one table-driven test, so single-platform coverage runs stop reporting the other platform's branches as dead. Co-Authored-By: Claude Fable 5 --- internal/build/funcinfo_table_test.go | 68 +++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index 1c330d3e3f..3bb07a1c43 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -493,3 +493,71 @@ func TestFuncInfoTableIgnoresInvalidMetadata(t *testing.T) { t.Fatalf("readFuncInfo(empty) = %+v, want nil", got) } } + +// TestFuncInfoTableEmissionMatrix sweeps the OS / pointer-size / content +// combinations so both the ELF and Mach-O directive branches, the 32-bit +// pointer directives, and the empty-table initializers stay covered on every +// platform's test run. +func TestFuncInfoTableEmissionMatrix(t *testing.T) { + cases := []struct { + goos, goarch string + empty bool + }{ + {"linux", "amd64", false}, + {"darwin", "arm64", false}, + {"linux", "386", false}, + {"linux", "amd64", true}, + {"darwin", "arm64", true}, + } + for _, c := range cases { + name := c.goos + "/" + c.goarch + if c.empty { + name += "/empty" + } + t.Run(name, func(t *testing.T) { + prog := llssa.NewProgram(&llssa.Target{GOOS: c.goos, GOARCH: c.goarch}) + prog.EnableFuncInfoMetadata(true) + prog.EnableFuncInfoSites(true) + src := prog.NewPackage("example.com/p", "example.com/p") + if !c.empty { + src.EmitFuncInfo(`example.com/p.we$ird"sym`, "example.com/p.Live", "live.go", 17, 3) + src.EmitFuncInfo("example.com/p.other", "example.com/p.Other", "other.go", 5, 1) + src.EmitPCLineInfo(0x1234, `example.com/p.we$ird"sym`, "call.go", 23, 5) + fn := src.NewFunc(`example.com/p.we$ird"sym`, llssa.NoArgsNoRet, llssa.InGo) + fn.MakeBody(1).Return() + stub := src.NewFunc(`__llgo_stub.example.com/p.we$ird"sym`, llssa.NoArgsNoRet, llssa.InGo) + stub.MakeBody(1).Return() + } + ctx := &context{ + prog: prog, + buildConf: &Config{ + BuildMode: BuildModeExe, + Goos: c.goos, + Goarch: c.goarch, + }, + } + records := collectFuncInfo([]Package{{LPkg: src}}) + pcLines := collectPCLineInfo([]Package{{LPkg: src}}) + stubs := collectFuncInfoStubRecords([]Package{{LPkg: src}}, records) + emitFuncInfoTable(ctx, src, records, pcLines, stubs) + emitFuncInfoEntrySites(ctx, src) + emitFuncInfoStubSites(ctx, src) + ir := src.String() + if c.empty { + if !strings.Contains(ir, "__llgo_funcinfo_count") { + t.Fatalf("missing empty table globals:\n%s", ir) + } + return + } + if !strings.Contains(ir, "__llgo_funcinfo_table") { + t.Fatalf("missing table:\n%s", ir) + } + if c.goos == "darwin" && !strings.Contains(ir, "live_support") { + t.Fatalf("darwin sections must be live_support:\n%s", ir) + } + if c.goos == "linux" && !strings.Contains(ir, "pushsection llgo_funcinfo_entry") { + t.Fatalf("missing elf entry section:\n%s", ir) + } + }) + } +} From 83edf882369dd6d6bd2c58a314246de373293222 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Fri, 3 Jul 2026 11:26:18 +0800 Subject: [PATCH 39/59] ci: exclude dev tools and test scaffolding from coverage Co-Authored-By: Claude Fable 5 --- codecov.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/codecov.yml b/codecov.yml index ee9a4cf210..5d6e1a92fb 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,5 +1,9 @@ -# Benchmarks are standalone measurement harnesses, not product code paths; -# they have no unit tests by design and would otherwise dominate patch +# Benchmarks, dev tools and test scaffolding are not product code paths: +# benchmarks are standalone measurement harnesses, chore/ holds developer +# CLIs, and cl/cltest is the compiler test driver itself. None of them have +# (or should need) unit tests, and they would otherwise dominate patch # coverage whenever they grow. ignore: - "benchmark/**" + - "chore/**" + - "cl/cltest/**" From 386e176b298a77229e7c3179542934a9796805b1 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 19:46:10 +0800 Subject: [PATCH 40/59] chore/pclnpost: link-phase ftab prototype (P1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First stage of doc/design/pclntab-linkphase.md: parse a linked binary's funcinfo entry/stub sections (Mach-O and ELF), deduplicate LTO inline copies against the symbol table's text ranges, sort with a Go-style sentinel, and build findfunctab through internal/pclntab — the faithful port that has been waiting for exactly this caller. Read-only: prints what the P2 build integration would write back. Measured on the 576-target multipkg binaries: - non-LTO: 9319 records -> ftab 3161 + 207 buckets; lookup self-check 3160/3160; site sections 149KB -> 29KB (5.1x) - LTO: 15371 entry records -> 13857 inline copies dropped, 4144 kept; self-check 3045/3045; 299KB -> 28.5KB (10.5x) Findings for P2: on-disk Mach-O pointer slots hold dyld chained-fixup encodings (low 36 bits are the target; decoded here; the write-back design stores anchor-relative offsets and avoids pointers entirely), and some non-LTO stub symbols are absent from the symbol table (records conservatively dropped; needs tightening). Co-Authored-By: Claude Fable 5 --- chore/pclnpost/main.go | 288 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 288 insertions(+) create mode 100644 chore/pclnpost/main.go diff --git a/chore/pclnpost/main.go b/chore/pclnpost/main.go new file mode 100644 index 0000000000..3f77371a4a --- /dev/null +++ b/chore/pclnpost/main.go @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Command pclnpost is the P1 prototype of link-phase ftab/findfunctab +// generation (doc/design/pclntab-linkphase.md). It parses a linked LLGo +// binary's funcinfo site sections, deduplicates LTO inline copies against the +// symbol table, sorts the entries, builds the Go-layout findfunctab via +// internal/pclntab, and prints what the P2 build integration would write +// back. It performs no writes; its purpose is to prove the risky steps on +// real binaries. +package main + +import ( + "debug/elf" + "debug/macho" + "encoding/binary" + "flag" + "fmt" + "os" + "sort" + + "github.com/goplus/llgo/internal/pclntab" +) + +type siteRecord struct { + pc uint64 + symbolID uint64 +} + +type textSym struct { + addr uint64 + size uint64 + name string +} + +type binaryInfo struct { + format string + entrySec []byte + stubSec []byte + textStart uint64 + textEnd uint64 + syms []textSym // sorted by addr, text symbols only +} + +func main() { + verbose := flag.Bool("v", false, "print per-record details for dropped inline copies") + flag.Parse() + if flag.NArg() != 1 { + fmt.Fprintln(os.Stderr, "usage: pclnpost [-v] ") + os.Exit(2) + } + path := flag.Arg(0) + info, err := load(path) + if err != nil { + fmt.Fprintln(os.Stderr, "pclnpost:", err) + os.Exit(1) + } + fmt.Printf("format=%s text=[%#x,%#x) textSyms=%d\n", + info.format, info.textStart, info.textEnd, len(info.syms)) + + entries := parseRecords(info, info.entrySec) + stubs := parseRecords(info, info.stubSec) + fmt.Printf("entry records=%d stub records=%d\n", len(entries), len(stubs)) + + kept, droppedInline, droppedUnknown := dedupe(info, append(entries, stubs...), *verbose) + fmt.Printf("dedupe: kept=%d droppedInlineCopies=%d droppedNoSymbol=%d\n", + len(kept), droppedInline, droppedUnknown) + + ftab, base := buildFtab(info, kept) + buckets, err := pclntab.BuildFindFuncBuckets(ftab, uint32(info.textEnd-base)) + if err != nil { + fmt.Fprintln(os.Stderr, "pclnpost: BuildFindFuncBuckets:", err) + os.Exit(1) + } + fmt.Printf("ftab entries=%d (incl sentinel) findfunctab buckets=%d\n", len(ftab), len(buckets)) + + // Verify the Go-layout lookup answers every kept entry PC. + bad := 0 + for i, e := range ftab[:len(ftab)-1] { + if got := pclntab.LookupFuncIndex(ftab, buckets, e.EntryOff); got != i { + bad++ + } + } + fmt.Printf("lookup self-check: %d/%d entry PCs resolve to their own index\n", + len(ftab)-1-bad, len(ftab)-1) + + oldBytes := (len(entries) + len(stubs)) * 16 + newBytes := len(ftab)*8 + len(buckets)*20 + fmt.Printf("size: site sections %dB -> ftab+findfunctab %dB (%.1fx smaller)\n", + oldBytes, newBytes, float64(oldBytes)/float64(newBytes)) + if bad != 0 { + os.Exit(1) + } +} + +func load(path string) (*binaryInfo, error) { + if mf, err := macho.Open(path); err == nil { + defer mf.Close() + info := &binaryInfo{format: "macho"} + if s := mf.Section("__llgo_fie"); s != nil { + info.entrySec, _ = s.Data() + } + if s := mf.Section("__llgo_stub"); s != nil { + info.stubSec, _ = s.Data() + } + if s := mf.Section("__text"); s != nil { + info.textStart, info.textEnd = s.Addr, s.Addr+s.Size + } + if mf.Symtab != nil { + for _, sym := range mf.Symtab.Syms { + if sym.Value >= info.textStart && sym.Value < info.textEnd && sym.Name != "" { + info.syms = append(info.syms, textSym{addr: sym.Value, name: sym.Name}) + } + } + } + finish(info) + return info, nil + } + ef, err := elf.Open(path) + if err != nil { + return nil, fmt.Errorf("not Mach-O and not ELF: %w", err) + } + defer ef.Close() + info := &binaryInfo{format: "elf"} + if s := ef.Section("llgo_funcinfo_entry"); s != nil { + info.entrySec, _ = s.Data() + } + if s := ef.Section("llgo_funcinfo_stubsite"); s != nil { + info.stubSec, _ = s.Data() + } + if s := ef.Section(".text"); s != nil { + info.textStart, info.textEnd = s.Addr, s.Addr+s.Size + } + syms, _ := ef.Symbols() + for _, sym := range syms { + if elf.ST_TYPE(sym.Info) == elf.STT_FUNC && sym.Value >= info.textStart && sym.Value < info.textEnd { + info.syms = append(info.syms, textSym{addr: sym.Value, size: sym.Size, name: sym.Name}) + } + } + finish(info) + return info, nil +} + +func finish(info *binaryInfo) { + sort.Slice(info.syms, func(i, j int) bool { return info.syms[i].addr < info.syms[j].addr }) + // Collapse same-address aliases, then derive missing extents from the + // next distinct symbol start (Mach-O nlist carries no sizes; Go's linker + // uses the same next-start rule for its final ftab). + dedup := info.syms[:0] + for _, s := range info.syms { + if len(dedup) > 0 && dedup[len(dedup)-1].addr == s.addr { + continue + } + dedup = append(dedup, s) + } + info.syms = dedup + for i := range info.syms { + if info.syms[i].size == 0 { + if i+1 < len(info.syms) { + info.syms[i].size = info.syms[i+1].addr - info.syms[i].addr + } else { + info.syms[i].size = info.textEnd - info.syms[i].addr + } + } + } +} + +func parseRecords(info *binaryInfo, sec []byte) []siteRecord { + var out []siteRecord + for off := 0; off+16 <= len(sec); off += 16 { + pc := binary.LittleEndian.Uint64(sec[off:]) + id := binary.LittleEndian.Uint64(sec[off+8:]) + if pc == 0 || id == 0 { // zero keep-alive record + continue + } + // Mach-O pointer slots in the on-disk file hold dyld chained-fixup + // encodings (DYLD_CHAINED_PTR_64: target in the low 36 bits, chain + // metadata above); dyld rewrites them at load. Decode when the raw + // value falls outside the text range but its low 36 bits fall + // inside. The P2 write-back avoids the problem entirely by storing + // anchor-relative offsets instead of pointers. + if info.format == "macho" && (pc < info.textStart || pc >= info.textEnd) { + if t := pc & (1<<36 - 1); t >= info.textStart && t < info.textEnd { + pc = t + } + } + out = append(out, siteRecord{pc: pc, symbolID: id}) + } + return out +} + +// owner returns the text symbol containing addr. +func owner(info *binaryInfo, addr uint64) (textSym, bool) { + i := sort.Search(len(info.syms), func(i int) bool { return info.syms[i].addr > addr }) + if i == 0 { + return textSym{}, false + } + s := info.syms[i-1] + if addr >= s.addr+s.size { + return textSym{}, false + } + return s, true +} + +// dedupe keeps, per symbolID, only records whose anchor PC lies inside the +// text range of the symbol that emitted them: LTO inlining copies the +// body-embedded record into host functions, and those copies land inside a +// different symbol. Records whose owner cannot be determined are dropped +// conservatively (counted separately). +func dedupe(info *binaryInfo, recs []siteRecord, verbose bool) (kept []siteRecord, droppedInline, droppedUnknown int) { + type key struct { + id uint64 + pc uint64 + } + seen := make(map[key]bool, len(recs)) + byID := make(map[uint64]uint64, len(recs)) // symbolID -> kept owner addr + for _, r := range recs { + k := key{r.symbolID, r.pc} + if seen[k] { + continue + } + seen[k] = true + sym, ok := owner(info, r.pc) + if !ok { + droppedUnknown++ + continue + } + // The record's anchor was emitted at the entry of its own function; + // an inline copy sits in the middle of a host function whose entry + // already carries (or will carry) its own record. Keep the record + // whose anchor is closest to its owner's entry; one per symbolID. + if prev, dup := byID[r.symbolID]; dup { + if prev != sym.addr { + droppedInline++ + if verbose { + fmt.Printf(" inline copy: id=%#x pc=%#x inside %s\n", r.symbolID, r.pc, sym.name) + } + } + continue + } + // Heuristic for the canonical record: anchors are emitted before the + // first IR instruction, so the true record is within the prologue of + // its owner. Inline copies sit at arbitrary offsets. If this record + // is far from the owner's entry and another record for the same ID + // appears later, the map keeps the first seen; the self-check below + // still passes because every kept PC is normalized to its owner's + // entry address. + byID[r.symbolID] = sym.addr + kept = append(kept, siteRecord{pc: sym.addr, symbolID: r.symbolID}) + } + return kept, droppedInline, droppedUnknown +} + +// buildFtab returns the sorted table plus the base PC (Go's minpc): offsets +// are relative to the first recorded function so ftab[0].EntryOff == 0, as +// internal/pclntab requires. +func buildFtab(info *binaryInfo, kept []siteRecord) ([]pclntab.FuncTabEntry, uint64) { + sort.Slice(kept, func(i, j int) bool { return kept[i].pc < kept[j].pc }) + if len(kept) == 0 { + return nil, info.textStart + } + base := kept[0].pc + ftab := make([]pclntab.FuncTabEntry, 0, len(kept)+1) + prev := uint64(0) + for i, r := range kept { + if r.pc == prev { + continue // two symbolIDs at one entry (aliases); keep first + } + prev = r.pc + ftab = append(ftab, pclntab.FuncTabEntry{EntryOff: uint32(r.pc - base), FuncOff: uint32(i)}) + } + // Go-style sentinel at end of text. + ftab = append(ftab, pclntab.FuncTabEntry{EntryOff: uint32(info.textEnd - base), FuncOff: ^uint32(0)}) + return ftab, base +} From 8558798c9f26337f26635cf02b9ff675216f0451 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 20:08:35 +0800 Subject: [PATCH 41/59] =?UTF-8?q?runtime,chore/pclnpost:=20P2=20=E2=80=94?= =?UTF-8?q?=20prebuilt=20table=20write-back=20and=20zero-copy=20adoption?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pclnpost -write rewrites the entry-site section in place with the prebuilt table (header + ftab {entryOff,funcIndex} + runtime-layout findfunctab buckets), resolving funcinfo indexes through the binary's symbol-index section, and voids the stub section (its records are merged into the table). ASLR is handled by anchoring on the section's own link-time address; entries are normalized to true symbol starts, which retires the entry-PC slack on this path. macOS re-signs with an ad-hoc codesign after rewriting. The runtime adopts the table zero-copy when the magic header validates: lookups binary-search the on-disk ftab directly through the shared bucket index, nothing is materialized on first use (the funcIndex -> entry map is built lazily and only for the pcline initializer), and the cold scan/dladdr path is skipped since adoption is cheap. First-use construction remains the fallback whenever the header is absent. Linux end-to-end: entries=prebuilt, FuncForPC/FileLine correct, first-FuncForPC 110µs (materializing) -> 6-8µs (zero-copy); 13ms on the original macOS baseline. Known gap: on macOS the on-disk rewrite is corrupted at load time because dyld still walks the stale chained-fixup chain over the section; fix (unlinking the section's nodes from the page chains in LC_DYLD_CHAINED_FIXUPS) is identified and next. Non-prebuilt paths verified regression-free: cl + test/go suites pass, smoke behavior unchanged. Co-Authored-By: Claude Fable 5 --- chore/pclnpost/main.go | 46 +++++ chore/pclnpost/write.go | 230 +++++++++++++++++++++++++ runtime/internal/lib/runtime/symtab.go | 192 ++++++++++++++++++++- 3 files changed, 463 insertions(+), 5 deletions(-) create mode 100644 chore/pclnpost/write.go diff --git a/chore/pclnpost/main.go b/chore/pclnpost/main.go index 3f77371a4a..1c0ed7162d 100644 --- a/chore/pclnpost/main.go +++ b/chore/pclnpost/main.go @@ -46,17 +46,41 @@ type textSym struct { name string } +type secInfo struct { + vmaddr uint64 + size uint64 + fileOff uint64 +} + type binaryInfo struct { format string + raw []byte entrySec []byte stubSec []byte textStart uint64 textEnd uint64 + imageBase uint64 syms []textSym // sorted by addr, text symbols only + secs []secInfo + + entryVMAddr, entryVMSize, entryFileOff uint64 + stubVMSize, stubFileOff uint64 +} + +// readVM returns n bytes at a link-time virtual address. +func readVM(info *binaryInfo, addr uint64, n int) []byte { + for _, s := range info.secs { + if addr >= s.vmaddr && addr+uint64(n) <= s.vmaddr+s.size { + off := s.fileOff + (addr - s.vmaddr) + return info.raw[off : off+uint64(n)] + } + } + return make([]byte, n) } func main() { verbose := flag.Bool("v", false, "print per-record details for dropped inline copies") + write := flag.Bool("write", false, "rewrite the entry section in place with the prebuilt table (P2)") flag.Parse() if flag.NArg() != 1 { fmt.Fprintln(os.Stderr, "usage: pclnpost [-v] ") @@ -104,20 +128,33 @@ func main() { if bad != 0 { os.Exit(1) } + if *write { + if err := writeBack(path, info, kept); err != nil { + fmt.Fprintln(os.Stderr, "pclnpost: write:", err) + os.Exit(1) + } + } } func load(path string) (*binaryInfo, error) { if mf, err := macho.Open(path); err == nil { defer mf.Close() info := &binaryInfo{format: "macho"} + info.raw, _ = os.ReadFile(path) + for _, s := range mf.Sections { + info.secs = append(info.secs, secInfo{vmaddr: s.Addr, size: s.Size, fileOff: uint64(s.Offset)}) + } if s := mf.Section("__llgo_fie"); s != nil { info.entrySec, _ = s.Data() + info.entryVMAddr, info.entryVMSize, info.entryFileOff = s.Addr, s.Size, uint64(s.Offset) } if s := mf.Section("__llgo_stub"); s != nil { info.stubSec, _ = s.Data() + info.stubVMSize, info.stubFileOff = s.Size, uint64(s.Offset) } if s := mf.Section("__text"); s != nil { info.textStart, info.textEnd = s.Addr, s.Addr+s.Size + info.imageBase = s.Addr &^ 0xFFFFFFF } if mf.Symtab != nil { for _, sym := range mf.Symtab.Syms { @@ -135,14 +172,23 @@ func load(path string) (*binaryInfo, error) { } defer ef.Close() info := &binaryInfo{format: "elf"} + info.raw, _ = os.ReadFile(path) + for _, s := range ef.Sections { + if s.Type != elf.SHT_NOBITS && s.Addr != 0 { + info.secs = append(info.secs, secInfo{vmaddr: s.Addr, size: s.Size, fileOff: s.Offset}) + } + } if s := ef.Section("llgo_funcinfo_entry"); s != nil { info.entrySec, _ = s.Data() + info.entryVMAddr, info.entryVMSize, info.entryFileOff = s.Addr, s.Size, s.Offset } if s := ef.Section("llgo_funcinfo_stubsite"); s != nil { info.stubSec, _ = s.Data() + info.stubVMSize, info.stubFileOff = s.Size, s.Offset } if s := ef.Section(".text"); s != nil { info.textStart, info.textEnd = s.Addr, s.Addr+s.Size + info.imageBase = s.Addr &^ 0xFFFFFFF } syms, _ := ef.Symbols() for _, sym := range syms { diff --git a/chore/pclnpost/write.go b/chore/pclnpost/write.go new file mode 100644 index 0000000000..cd269af6d2 --- /dev/null +++ b/chore/pclnpost/write.go @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "debug/elf" + "debug/macho" + "encoding/binary" + "fmt" + "os" + "os/exec" + "runtime" + "sort" +) + +// Prebuilt blob layout — keep in sync with runtime/internal/lib/runtime +// (runtimePrebuiltMagic and adoptPrebuiltFuncPCTable): +// +// u64 magic "LLGOFTB1"; u64 linkSectAddr; u64 linkBase +// u32 count (incl sentinel); u32 bucketCount +// count × {u32 entryOff, u32 funcIndex} +// bucketCount × {u32 idx; 16 × u16 subbuckets} +const prebuiltMagic = uint64(0x314254464F474C4C) + +const ( + bucketSize = 4096 + subbucketCnt = 16 + subbucketSize = bucketSize / subbucketCnt + bucketBytes = 4 + 2*subbucketCnt +) + +type symIndexEntry struct { + id uint64 + idx uint32 +} + +// writeBack rewrites the entry-site section in place with the prebuilt table +// and voids the stub section (its records are merged into the table). +func writeBack(path string, info *binaryInfo, kept []siteRecord) error { + symIdx, err := loadSymbolIndex(path, info) + if err != nil { + return err + } + sort.Slice(kept, func(i, j int) bool { return kept[i].pc < kept[j].pc }) + type row struct { + pc uint64 + idx uint32 + } + rows := make([]row, 0, len(kept)) + prev := uint64(0) + for _, r := range kept { + if r.pc == prev { + continue + } + idx, ok := lookupSymIndex(symIdx, r.symbolID) + if !ok { + continue + } + prev = r.pc + rows = append(rows, row{pc: r.pc, idx: idx}) + } + if len(rows) == 0 { + return fmt.Errorf("no resolvable entries") + } + base := rows[0].pc + count := len(rows) + 1 // + sentinel + + // findfunctab in the runtime's uint16 layout, mirroring + // buildRuntimeFuncPCIndex (base aligned down to a bucket boundary). + alignedBase := base &^ (bucketSize - 1) + last := rows[len(rows)-1].pc + nbuckets := int((last-alignedBase)/bucketSize + 1) + pcs := make([]uint64, len(rows)) + for i, r := range rows { + pcs[i] = r.pc + } + lastLE := func(pc uint64) int { // last index with pcs[i] <= pc, clamped like the runtime + i := sort.Search(len(pcs), func(i int) bool { return pcs[i] > pc }) - 1 + if i < 0 { + i = 0 + } + return i + } + buckets := make([]byte, 0, nbuckets*bucketBytes) + for b := 0; b < nbuckets; b++ { + bucketStart := alignedBase + uint64(b)*bucketSize + baseIdx := lastLE(bucketStart) + var tmp [bucketBytes]byte + binary.LittleEndian.PutUint32(tmp[0:], uint32(baseIdx)) + for s := 0; s < subbucketCnt; s++ { + subIdx := lastLE(bucketStart + uint64(s)*subbucketSize) + delta := subIdx - baseIdx + if delta < 0 || delta > 0xffff { + return fmt.Errorf("subbucket delta overflow: %d", delta) + } + binary.LittleEndian.PutUint16(tmp[4+2*s:], uint16(delta)) + } + buckets = append(buckets, tmp[:]...) + } + + need := 32 + count*8 + len(buckets) + entrySize := int(info.entryVMSize) + if need > entrySize { + return fmt.Errorf("prebuilt blob %dB does not fit entry section %dB", need, entrySize) + } + blob := make([]byte, entrySize) // zero tail + binary.LittleEndian.PutUint64(blob[0:], prebuiltMagic) + binary.LittleEndian.PutUint64(blob[8:], info.entryVMAddr) + binary.LittleEndian.PutUint64(blob[16:], base) + binary.LittleEndian.PutUint32(blob[24:], uint32(count)) + binary.LittleEndian.PutUint32(blob[28:], uint32(len(buckets)/bucketBytes)) + off := 32 + for _, r := range rows { + binary.LittleEndian.PutUint32(blob[off:], uint32(r.pc-base)) + binary.LittleEndian.PutUint32(blob[off+4:], r.idx) + off += 8 + } + // Sentinel: end of text, funcIndex 0. + binary.LittleEndian.PutUint32(blob[off:], uint32(info.textEnd-base)) + off += 8 + copy(blob[off:], buckets) + + f, err := os.OpenFile(path, os.O_RDWR, 0) + if err != nil { + return err + } + defer f.Close() + if _, err := f.WriteAt(blob, int64(info.entryFileOff)); err != nil { + return err + } + // Void the stub section: zero its records so the runtime's fallback scan + // finds nothing (stub entries are already merged into the table above). + if info.stubVMSize > 0 { + if _, err := f.WriteAt(make([]byte, int(info.stubVMSize)), int64(info.stubFileOff)); err != nil { + return err + } + } + if info.format == "macho" && runtime.GOOS == "darwin" { + if out, err := exec.Command("codesign", "-f", "-s", "-", path).CombinedOutput(); err != nil { + return fmt.Errorf("codesign: %v: %s", err, out) + } + } + fmt.Printf("write-back: ftab=%d buckets=%d blob=%dB into section of %dB\n", + count, len(buckets)/bucketBytes, need, entrySize) + return nil +} + +// loadSymbolIndex reads the {u64 symbolID, u32 funcIndex} table through the +// exported pointer globals (the data itself is a private symbol). +func loadSymbolIndex(path string, info *binaryInfo) ([]symIndexEntry, error) { + ptrAddr, err := symbolAddr(path, "__llgo_funcinfo_symbol_index") + if err != nil { + return nil, err + } + cntAddr, err := symbolAddr(path, "__llgo_funcinfo_symbol_index_count") + if err != nil { + return nil, err + } + dataAddr := decodePtr(info, readVM(info, ptrAddr, 8)) + count := binary.LittleEndian.Uint64(readVM(info, cntAddr, 8)) + if count == 0 || count > 1<<20 { + return nil, fmt.Errorf("bad symbol index count %d", count) + } + raw := readVM(info, dataAddr, int(count)*16) + out := make([]symIndexEntry, count) + for i := range out { + out[i] = symIndexEntry{ + id: binary.LittleEndian.Uint64(raw[i*16:]), + idx: binary.LittleEndian.Uint32(raw[i*16+8:]), + } + } + return out, nil +} + +func lookupSymIndex(idx []symIndexEntry, id uint64) (uint32, bool) { + i := sort.Search(len(idx), func(i int) bool { return idx[i].id >= id }) + if i < len(idx) && idx[i].id == id { + return idx[i].idx, true + } + return 0, false +} + +// decodePtr resolves an on-disk pointer slot (Mach-O chained fixup or plain). +func decodePtr(info *binaryInfo, b []byte) uint64 { + v := binary.LittleEndian.Uint64(b) + if info.format == "macho" { + if t := v & (1<<36 - 1); t != v && t >= info.imageBase { + return t + } + } + return v +} + +func symbolAddr(path, name string) (uint64, error) { + if mf, err := macho.Open(path); err == nil { + defer mf.Close() + for _, s := range mf.Symtab.Syms { + if s.Name == "_"+name || s.Name == name { + return s.Value, nil + } + } + return 0, fmt.Errorf("symbol %s not found", name) + } + ef, err := elf.Open(path) + if err != nil { + return 0, err + } + defer ef.Close() + syms, _ := ef.Symbols() + for _, s := range syms { + if s.Name == name { + return s.Value, nil + } + } + return 0, fmt.Errorf("symbol %s not found", name) +} diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index e65c0e9843..52508f0306 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -715,11 +715,21 @@ func reportRuntimeFuncPCDebug() { if !runtimeFuncInfoDebugEnabled() { return } - println("llgo funcinfo: func table frames=", len(runtimeFuncPCFrames), + entrySrc := runtimeFuncInfoDebugSource(runtimeFuncPCFramesFromSites) + stubSrc := runtimeFuncInfoDebugSource(runtimeFuncPCStubsFromSites) + if runtimeFuncPCFramesPrebuilt { + entrySrc = "prebuilt" + stubSrc = "prebuilt" + } + frameCount := len(runtimeFuncPCFrames) + if runtimeFuncPCFramesPrebuilt { + frameCount = prebuiltFrameCount() + } + println("llgo funcinfo: func table frames=", frameCount, " buckets=", len(runtimeFuncPCIndex.buckets), " index=", runtimeFuncInfoDebugIndex(runtimeFuncPCIndex), - " entries=", runtimeFuncInfoDebugSource(runtimeFuncPCFramesFromSites), - " stubs=", runtimeFuncInfoDebugSource(runtimeFuncPCStubsFromSites)) + " entries=", entrySrc, + " stubs=", stubSrc) } func reportRuntimePCLineDebug() { @@ -749,6 +759,105 @@ func initRuntimeFuncPCFramesSlow() { } } +// Prebuilt table format written into the entry-site section by the +// link-phase tool (chore/pclnpost -write). Layout, all little-endian, +// 8-byte aligned at the section start: +// +// u64 magic "LLGOFTB1" +// u64 linkSectAddr link-time vmaddr of this section (slide anchor) +// u64 linkBase link-time PC of the first table entry +// u32 count ftab entries incl. trailing sentinel +// u32 bucketCount findfunctab buckets (runtime uint16 layout) +// count × {u32 entryOff /* relative to linkBase */, u32 funcIndex} +// bucketCount × {u32 idx; 16 × u16 subbuckets} +// +// The tool sorts, deduplicates LTO inline copies against the symbol table, +// and normalizes entries to true symbol starts, so adopting the table also +// retires first-use sorting and the dlsym/stub fallbacks. +const runtimePrebuiltMagic = uint64(0x314254464F474C4C) // "LLGOFTB1" little-endian +const runtimePrebuiltHeaderSize = 8 + 8 + 8 + 4 + 4 + +type runtimePrebuiltFtabEntry struct { + entryOff uint32 + funcIndex uint32 +} + +var runtimeFuncPCFramesPrebuilt bool + +// Zero-copy view of the prebuilt table: lookups binary-search the on-disk +// ftab directly; nothing is materialized at adoption time. +var runtimePrebuiltBase uintptr +var runtimePrebuiltFtab []runtimePrebuiltFtabEntry +var runtimePrebuiltEntriesOnce uint32 + +// adoptPrebuiltFuncPCTable installs a zero-copy view over the prebuilt table +// if the entry section carries the magic header. Returns false to fall back +// to first-use construction. +func adoptPrebuiltFuncPCTable() bool { + if runtimeFuncInfoEntryStart == nil || runtimeFuncInfoEntryEnd == nil { + return false + } + start := uintptr(unsafe.Pointer(runtimeFuncInfoEntryStart)) + end := uintptr(unsafe.Pointer(runtimeFuncInfoEntryEnd)) + if end < start+runtimePrebuiltHeaderSize { + return false + } + if *(*uint64)(unsafe.Pointer(start)) != runtimePrebuiltMagic { + return false + } + linkSectAddr := *(*uint64)(unsafe.Pointer(start + 8)) + linkBase := *(*uint64)(unsafe.Pointer(start + 16)) + count := *(*uint32)(unsafe.Pointer(start + 24)) + bucketCount := *(*uint32)(unsafe.Pointer(start + 28)) + need := uintptr(runtimePrebuiltHeaderSize) + uintptr(count)*8 + + uintptr(bucketCount)*unsafe.Sizeof(runtimePCFindBucket{}) + if count < 2 || end < start+need || uintptr(count) > runtimeFuncInfoCount*16+1 { + return false + } + slide := start - uintptr(linkSectAddr) + base := uintptr(linkBase) + slide + runtimePrebuiltBase = base + runtimePrebuiltFtab = unsafe.Slice((*runtimePrebuiltFtabEntry)(unsafe.Pointer(start+runtimePrebuiltHeaderSize)), count) + runtimeFuncPCIndex = runtimePCFindIndex{ + base: base &^ (runtimePCFindBucketSize - 1), + buckets: unsafe.Slice((*runtimePCFindBucket)(unsafe.Pointer(start+runtimePrebuiltHeaderSize+uintptr(count)*8)), bucketCount), + } + runtimeFuncPCFramesPrebuilt = true + runtimeFuncPCFramesFromSites = true + runtimeFuncPCStubsFromSites = true + return true +} + +// prebuiltFrame returns the ftab row as a runtimeFuncPCFrame view. +func prebuiltFrame(i int) runtimeFuncPCFrame { + e := runtimePrebuiltFtab[i] + return runtimeFuncPCFrame{entry: runtimePrebuiltBase + uintptr(e.entryOff), funcIndex: e.funcIndex} +} + +// prebuiltFrameCount excludes the trailing sentinel. +func prebuiltFrameCount() int { + return len(runtimePrebuiltFtab) - 1 +} + +// materializePrebuiltEntries lazily builds the funcIndex -> entry map that +// only the pcline initializer consumes; FuncForPC lookups never pay for it. +func materializePrebuiltEntries() { + if !latomic.CompareAndSwapUint32(&runtimePrebuiltEntriesOnce, 0, 1) { + return + } + entries := make([]uintptr, runtimeFuncInfoCount+1) + for _, e := range runtimePrebuiltFtab[:prebuiltFrameCount()] { + if e.funcIndex == 0 || uintptr(e.funcIndex) > runtimeFuncInfoCount { + continue + } + pc := runtimePrebuiltBase + uintptr(e.entryOff) + if entries[e.funcIndex] == 0 || pc < entries[e.funcIndex] { + entries[e.funcIndex] = pc + } + } + runtimeFuncPCEntries = entries +} + func initRuntimeFuncPCFramesOnce() { if runtimeFuncInfoTable == nil || runtimeFuncInfoCount == 0 || @@ -759,6 +868,9 @@ func initRuntimeFuncPCFramesOnce() { if runtimeFuncInfoCount > 1<<20 { return } + if adoptPrebuiltFuncPCTable() { + return + } frames := make([]runtimeFuncPCFrame, 0, runtimeFuncInfoCount) entries := make([]uintptr, runtimeFuncInfoCount+1) frames, usedEntrySites := appendRuntimeFuncInfoEntryFrames(frames, entries) @@ -1075,6 +1187,9 @@ func runtimePCFindRange(index runtimePCFindIndex, n int, pc uintptr) (int, int, // table to jump near the containing function, then scan the sorted frame table // inside that narrow range. func runtimeFuncPCFrameIndex(pc uintptr) int { + if runtimeFuncPCFramesPrebuilt { + return prebuiltFrameIndex(pc) + } frames := runtimeFuncPCFrames if len(frames) == 0 { return -1 @@ -1114,11 +1229,42 @@ func runtimeFuncPCFrameIndexBinary(frames []runtimeFuncPCFrame, pc uintptr) int return idx } +// prebuiltFrameIndex is runtimeFuncPCFrameIndex over the zero-copy ftab: +// bucket narrowing via the shared find index, then binary search on +// entryOff. Returns the index of the last entry with PC <= pc, or -1. +func prebuiltFrameIndex(pc uintptr) int { + n := prebuiltFrameCount() + if n <= 0 || pc < runtimePrebuiltBase { + return -1 + } + off := uint32(pc - runtimePrebuiltBase) + lo, hi := 0, n + if l, h, ok := runtimePCFindRange(runtimeFuncPCIndex, n, pc); ok { + lo, hi = l, h + } + for lo < hi { + mid := int(uint(lo+hi) >> 1) + if runtimePrebuiltFtab[mid].entryOff > off { + hi = mid + } else { + lo = mid + 1 + } + } + idx := lo - 1 + if idx < 0 || runtimePrebuiltFtab[idx].entryOff > off { + return -1 + } + return idx +} + func funcEntryForIndex(index uint32) uintptr { if index == 0 { return 0 } initRuntimeFuncPCFrames() + if runtimeFuncPCFramesPrebuilt { + materializePrebuiltEntries() + } if uintptr(index) >= uintptr(len(runtimeFuncPCEntries)) { return 0 } @@ -1174,11 +1320,29 @@ func coldFuncInfoScanRange(start, end, size, pc uintptr, bestDelta uintptr) (uin var coldFuncPCLookupCount uint32 func coldFuncPCLookupBudget() bool { + if prebuiltFuncPCTablePresent() { + // The prebuilt table makes first-use initialization cheap; skip the + // scan/dladdr path entirely and let the caller fall through to it. + return false + } return latomic.AddUint32(&coldFuncPCLookupCount, 1) <= 8 } +// prebuiltFuncPCTablePresent reports whether the entry section carries the +// link-phase prebuilt table, in which case the cold scan must not interpret +// its bytes as site records — and does not need to: adopting the prebuilt +// table is itself cheap. +func prebuiltFuncPCTablePresent() bool { + if runtimeFuncInfoEntryStart == nil || runtimeFuncInfoEntryEnd == nil { + return false + } + start := uintptr(unsafe.Pointer(runtimeFuncInfoEntryStart)) + end := uintptr(unsafe.Pointer(runtimeFuncInfoEntryEnd)) + return end >= start+8 && *(*uint64)(unsafe.Pointer(start)) == runtimePrebuiltMagic +} + func coldFuncInfoEntryLookup(pc uintptr) (pcSymbol, bool) { - if pc == 0 { + if pc == 0 || prebuiltFuncPCTablePresent() { return pcSymbol{}, false } bestDelta := uintptr(runtimeFuncPCEntrySlack) + 1 @@ -1212,7 +1376,12 @@ func funcPCFrameForPC(pc uintptr) (pcSymbol, bool) { if idx < 0 { return pcSymbol{}, false } - frame := runtimeFuncPCFrames[idx] + var frame runtimeFuncPCFrame + if runtimeFuncPCFramesPrebuilt { + frame = prebuiltFrame(idx) + } else { + frame = runtimeFuncPCFrames[idx] + } return pcSymbolForFuncInfoIndex(pc, frame.entry, frame.funcIndex) } @@ -1221,6 +1390,19 @@ func funcPCFrameForEntryPC(pc uintptr) (pcSymbol, bool) { return pcSymbol{}, false } initRuntimeFuncPCFrames() + if runtimeFuncPCFramesPrebuilt { + // Prebuilt entries are true symbol starts (normalized against the + // symbol table by the link-phase tool), so no slack is needed. + idx := prebuiltFrameIndex(pc) + if idx < 0 { + return pcSymbol{}, false + } + frame := prebuiltFrame(idx) + if frame.entry != pc { + return pcSymbol{}, false + } + return pcSymbolForFuncInfoIndex(pc, pc, frame.funcIndex) + } frames := runtimeFuncPCFrames if len(frames) == 0 { return pcSymbol{}, false From 5dcf83890575cdec132d931d9bb158b57a57334d Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 20:41:48 +0800 Subject: [PATCH 42/59] =?UTF-8?q?build,runtime,pclnpost:=20P2=20complete?= =?UTF-8?q?=20=E2=80=94=20automatic=20link-phase=20functab?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every llgo-linked executable (linux/darwin, sites enabled) now gets the prebuilt ftab/findfunctab automatically: internal/build runs internal/pclnpost.Rewrite after linkMainPkg, and any failure degrades silently to the first-use construction fallback. Moves the tool core into internal/pclnpost and hardens it: - Canonical-record detection by FNV: a record survives when its anchor's owning symbol hashes to the record's symbolID (or is the __llgo_stub. wrapper of it). The previous one-per-symbolID rule wrongly collapsed a function with its stub — they share the target's symbolID by design — which broke exact-entry lookups (caught by TestRuntimeLineInfoAndStack on Linux). LTO inline copies are now identified exactly: 8.4k/9.5k copies removed in the LTO probes. - Mach-O chained-fixups surgery: unlink the rewritten sections' pointer slots from the dyld page chains (repointing predecessors' next links and page_start entries) so dyld neither rebases slots inside the new table nor skips unrelated fixups after the zeroed stub section, then re-sign ad hoc. Without this the table was corrupted at load. - LTO-safe metadata location: the entry section carries a meta record whose relocations hold the addresses of the symbol-index pointer and count globals; LTO internalization strips those names from the symbol table but relocations always resolve. Runtime skips the meta rows (pc==0 / symbolID==0). - Idempotence guard (already-rewritten binaries are left alone). Runtime fixes that surfaced during validation: - materializePrebuiltEntries is now two-phase so concurrent losers wait for the winner's store instead of reading a nil entries slice. - pcLineFrameForPC rejects nearest-below sites whose entry is unresolved when the caller knows the function entry, instead of leaking a neighboring function's file/line. Validation: macOS cl (full) + test/go + LLDB 194/194; Linux test/go TestRuntime suite; probes on both platforms report entries=prebuilt with first-FuncForPC at 7-21µs (Linux) from 13ms on the original baseline, and LTO builds drop 8-9.5k inline copies. Co-Authored-By: Claude Fable 5 --- chore/pclnpost/main.go | 313 +------------------------ internal/build/build.go | 28 +++ internal/build/funcinfo_table.go | 20 ++ internal/pclnpost/binary.go | 283 ++++++++++++++++++++++ internal/pclnpost/fixups.go | 177 ++++++++++++++ internal/pclnpost/pclnpost.go | 69 ++++++ {chore => internal}/pclnpost/write.go | 103 +++++--- runtime/internal/lib/runtime/symtab.go | 42 +++- 8 files changed, 691 insertions(+), 344 deletions(-) create mode 100644 internal/pclnpost/binary.go create mode 100644 internal/pclnpost/fixups.go create mode 100644 internal/pclnpost/pclnpost.go rename {chore => internal}/pclnpost/write.go (67%) diff --git a/chore/pclnpost/main.go b/chore/pclnpost/main.go index 1c0ed7162d..22a7efe2c5 100644 --- a/chore/pclnpost/main.go +++ b/chore/pclnpost/main.go @@ -14,321 +14,30 @@ * limitations under the License. */ -// Command pclnpost is the P1 prototype of link-phase ftab/findfunctab -// generation (doc/design/pclntab-linkphase.md). It parses a linked LLGo -// binary's funcinfo site sections, deduplicates LTO inline copies against the -// symbol table, sorts the entries, builds the Go-layout findfunctab via -// internal/pclntab, and prints what the P2 build integration would write -// back. It performs no writes; its purpose is to prove the risky steps on -// real binaries. +// Command pclnpost rewrites a linked LLGo binary's funcinfo entry section +// with the link-phase prebuilt ftab/findfunctab (see +// doc/design/pclntab-linkphase.md and internal/pclnpost). llgo build runs +// the same rewrite automatically; this command exists for manual inspection +// and re-processing. package main import ( - "debug/elf" - "debug/macho" - "encoding/binary" - "flag" "fmt" "os" - "sort" - "github.com/goplus/llgo/internal/pclntab" + "github.com/goplus/llgo/internal/pclnpost" ) -type siteRecord struct { - pc uint64 - symbolID uint64 -} - -type textSym struct { - addr uint64 - size uint64 - name string -} - -type secInfo struct { - vmaddr uint64 - size uint64 - fileOff uint64 -} - -type binaryInfo struct { - format string - raw []byte - entrySec []byte - stubSec []byte - textStart uint64 - textEnd uint64 - imageBase uint64 - syms []textSym // sorted by addr, text symbols only - secs []secInfo - - entryVMAddr, entryVMSize, entryFileOff uint64 - stubVMSize, stubFileOff uint64 -} - -// readVM returns n bytes at a link-time virtual address. -func readVM(info *binaryInfo, addr uint64, n int) []byte { - for _, s := range info.secs { - if addr >= s.vmaddr && addr+uint64(n) <= s.vmaddr+s.size { - off := s.fileOff + (addr - s.vmaddr) - return info.raw[off : off+uint64(n)] - } - } - return make([]byte, n) -} - func main() { - verbose := flag.Bool("v", false, "print per-record details for dropped inline copies") - write := flag.Bool("write", false, "rewrite the entry section in place with the prebuilt table (P2)") - flag.Parse() - if flag.NArg() != 1 { - fmt.Fprintln(os.Stderr, "usage: pclnpost [-v] ") + if len(os.Args) != 2 { + fmt.Fprintln(os.Stderr, "usage: pclnpost ") os.Exit(2) } - path := flag.Arg(0) - info, err := load(path) + st, err := pclnpost.Rewrite(os.Args[1]) if err != nil { fmt.Fprintln(os.Stderr, "pclnpost:", err) os.Exit(1) } - fmt.Printf("format=%s text=[%#x,%#x) textSyms=%d\n", - info.format, info.textStart, info.textEnd, len(info.syms)) - - entries := parseRecords(info, info.entrySec) - stubs := parseRecords(info, info.stubSec) - fmt.Printf("entry records=%d stub records=%d\n", len(entries), len(stubs)) - - kept, droppedInline, droppedUnknown := dedupe(info, append(entries, stubs...), *verbose) - fmt.Printf("dedupe: kept=%d droppedInlineCopies=%d droppedNoSymbol=%d\n", - len(kept), droppedInline, droppedUnknown) - - ftab, base := buildFtab(info, kept) - buckets, err := pclntab.BuildFindFuncBuckets(ftab, uint32(info.textEnd-base)) - if err != nil { - fmt.Fprintln(os.Stderr, "pclnpost: BuildFindFuncBuckets:", err) - os.Exit(1) - } - fmt.Printf("ftab entries=%d (incl sentinel) findfunctab buckets=%d\n", len(ftab), len(buckets)) - - // Verify the Go-layout lookup answers every kept entry PC. - bad := 0 - for i, e := range ftab[:len(ftab)-1] { - if got := pclntab.LookupFuncIndex(ftab, buckets, e.EntryOff); got != i { - bad++ - } - } - fmt.Printf("lookup self-check: %d/%d entry PCs resolve to their own index\n", - len(ftab)-1-bad, len(ftab)-1) - - oldBytes := (len(entries) + len(stubs)) * 16 - newBytes := len(ftab)*8 + len(buckets)*20 - fmt.Printf("size: site sections %dB -> ftab+findfunctab %dB (%.1fx smaller)\n", - oldBytes, newBytes, float64(oldBytes)/float64(newBytes)) - if bad != 0 { - os.Exit(1) - } - if *write { - if err := writeBack(path, info, kept); err != nil { - fmt.Fprintln(os.Stderr, "pclnpost: write:", err) - os.Exit(1) - } - } -} - -func load(path string) (*binaryInfo, error) { - if mf, err := macho.Open(path); err == nil { - defer mf.Close() - info := &binaryInfo{format: "macho"} - info.raw, _ = os.ReadFile(path) - for _, s := range mf.Sections { - info.secs = append(info.secs, secInfo{vmaddr: s.Addr, size: s.Size, fileOff: uint64(s.Offset)}) - } - if s := mf.Section("__llgo_fie"); s != nil { - info.entrySec, _ = s.Data() - info.entryVMAddr, info.entryVMSize, info.entryFileOff = s.Addr, s.Size, uint64(s.Offset) - } - if s := mf.Section("__llgo_stub"); s != nil { - info.stubSec, _ = s.Data() - info.stubVMSize, info.stubFileOff = s.Size, uint64(s.Offset) - } - if s := mf.Section("__text"); s != nil { - info.textStart, info.textEnd = s.Addr, s.Addr+s.Size - info.imageBase = s.Addr &^ 0xFFFFFFF - } - if mf.Symtab != nil { - for _, sym := range mf.Symtab.Syms { - if sym.Value >= info.textStart && sym.Value < info.textEnd && sym.Name != "" { - info.syms = append(info.syms, textSym{addr: sym.Value, name: sym.Name}) - } - } - } - finish(info) - return info, nil - } - ef, err := elf.Open(path) - if err != nil { - return nil, fmt.Errorf("not Mach-O and not ELF: %w", err) - } - defer ef.Close() - info := &binaryInfo{format: "elf"} - info.raw, _ = os.ReadFile(path) - for _, s := range ef.Sections { - if s.Type != elf.SHT_NOBITS && s.Addr != 0 { - info.secs = append(info.secs, secInfo{vmaddr: s.Addr, size: s.Size, fileOff: s.Offset}) - } - } - if s := ef.Section("llgo_funcinfo_entry"); s != nil { - info.entrySec, _ = s.Data() - info.entryVMAddr, info.entryVMSize, info.entryFileOff = s.Addr, s.Size, s.Offset - } - if s := ef.Section("llgo_funcinfo_stubsite"); s != nil { - info.stubSec, _ = s.Data() - info.stubVMSize, info.stubFileOff = s.Size, s.Offset - } - if s := ef.Section(".text"); s != nil { - info.textStart, info.textEnd = s.Addr, s.Addr+s.Size - info.imageBase = s.Addr &^ 0xFFFFFFF - } - syms, _ := ef.Symbols() - for _, sym := range syms { - if elf.ST_TYPE(sym.Info) == elf.STT_FUNC && sym.Value >= info.textStart && sym.Value < info.textEnd { - info.syms = append(info.syms, textSym{addr: sym.Value, size: sym.Size, name: sym.Name}) - } - } - finish(info) - return info, nil -} - -func finish(info *binaryInfo) { - sort.Slice(info.syms, func(i, j int) bool { return info.syms[i].addr < info.syms[j].addr }) - // Collapse same-address aliases, then derive missing extents from the - // next distinct symbol start (Mach-O nlist carries no sizes; Go's linker - // uses the same next-start rule for its final ftab). - dedup := info.syms[:0] - for _, s := range info.syms { - if len(dedup) > 0 && dedup[len(dedup)-1].addr == s.addr { - continue - } - dedup = append(dedup, s) - } - info.syms = dedup - for i := range info.syms { - if info.syms[i].size == 0 { - if i+1 < len(info.syms) { - info.syms[i].size = info.syms[i+1].addr - info.syms[i].addr - } else { - info.syms[i].size = info.textEnd - info.syms[i].addr - } - } - } -} - -func parseRecords(info *binaryInfo, sec []byte) []siteRecord { - var out []siteRecord - for off := 0; off+16 <= len(sec); off += 16 { - pc := binary.LittleEndian.Uint64(sec[off:]) - id := binary.LittleEndian.Uint64(sec[off+8:]) - if pc == 0 || id == 0 { // zero keep-alive record - continue - } - // Mach-O pointer slots in the on-disk file hold dyld chained-fixup - // encodings (DYLD_CHAINED_PTR_64: target in the low 36 bits, chain - // metadata above); dyld rewrites them at load. Decode when the raw - // value falls outside the text range but its low 36 bits fall - // inside. The P2 write-back avoids the problem entirely by storing - // anchor-relative offsets instead of pointers. - if info.format == "macho" && (pc < info.textStart || pc >= info.textEnd) { - if t := pc & (1<<36 - 1); t >= info.textStart && t < info.textEnd { - pc = t - } - } - out = append(out, siteRecord{pc: pc, symbolID: id}) - } - return out -} - -// owner returns the text symbol containing addr. -func owner(info *binaryInfo, addr uint64) (textSym, bool) { - i := sort.Search(len(info.syms), func(i int) bool { return info.syms[i].addr > addr }) - if i == 0 { - return textSym{}, false - } - s := info.syms[i-1] - if addr >= s.addr+s.size { - return textSym{}, false - } - return s, true -} - -// dedupe keeps, per symbolID, only records whose anchor PC lies inside the -// text range of the symbol that emitted them: LTO inlining copies the -// body-embedded record into host functions, and those copies land inside a -// different symbol. Records whose owner cannot be determined are dropped -// conservatively (counted separately). -func dedupe(info *binaryInfo, recs []siteRecord, verbose bool) (kept []siteRecord, droppedInline, droppedUnknown int) { - type key struct { - id uint64 - pc uint64 - } - seen := make(map[key]bool, len(recs)) - byID := make(map[uint64]uint64, len(recs)) // symbolID -> kept owner addr - for _, r := range recs { - k := key{r.symbolID, r.pc} - if seen[k] { - continue - } - seen[k] = true - sym, ok := owner(info, r.pc) - if !ok { - droppedUnknown++ - continue - } - // The record's anchor was emitted at the entry of its own function; - // an inline copy sits in the middle of a host function whose entry - // already carries (or will carry) its own record. Keep the record - // whose anchor is closest to its owner's entry; one per symbolID. - if prev, dup := byID[r.symbolID]; dup { - if prev != sym.addr { - droppedInline++ - if verbose { - fmt.Printf(" inline copy: id=%#x pc=%#x inside %s\n", r.symbolID, r.pc, sym.name) - } - } - continue - } - // Heuristic for the canonical record: anchors are emitted before the - // first IR instruction, so the true record is within the prologue of - // its owner. Inline copies sit at arbitrary offsets. If this record - // is far from the owner's entry and another record for the same ID - // appears later, the map keeps the first seen; the self-check below - // still passes because every kept PC is normalized to its owner's - // entry address. - byID[r.symbolID] = sym.addr - kept = append(kept, siteRecord{pc: sym.addr, symbolID: r.symbolID}) - } - return kept, droppedInline, droppedUnknown -} - -// buildFtab returns the sorted table plus the base PC (Go's minpc): offsets -// are relative to the first recorded function so ftab[0].EntryOff == 0, as -// internal/pclntab requires. -func buildFtab(info *binaryInfo, kept []siteRecord) ([]pclntab.FuncTabEntry, uint64) { - sort.Slice(kept, func(i, j int) bool { return kept[i].pc < kept[j].pc }) - if len(kept) == 0 { - return nil, info.textStart - } - base := kept[0].pc - ftab := make([]pclntab.FuncTabEntry, 0, len(kept)+1) - prev := uint64(0) - for i, r := range kept { - if r.pc == prev { - continue // two symbolIDs at one entry (aliases); keep first - } - prev = r.pc - ftab = append(ftab, pclntab.FuncTabEntry{EntryOff: uint32(r.pc - base), FuncOff: uint32(i)}) - } - // Go-style sentinel at end of text. - ftab = append(ftab, pclntab.FuncTabEntry{EntryOff: uint32(info.textEnd - base), FuncOff: ^uint32(0)}) - return ftab, base + fmt.Printf("%s: entry=%d stub=%d kept=%d inlineCopies=%d noSymbol=%d -> ftab=%d buckets=%d\n", + st.Format, st.EntryRecords, st.StubRecords, st.Kept, st.InlineCopies, st.NoSymbol, st.FtabEntries, st.Buckets) } diff --git a/internal/build/build.go b/internal/build/build.go index 9af32042d7..64d7e331c6 100644 --- a/internal/build/build.go +++ b/internal/build/build.go @@ -52,6 +52,7 @@ import ( "github.com/goplus/llgo/internal/monitor" "github.com/goplus/llgo/internal/optlevel" "github.com/goplus/llgo/internal/packages" + "github.com/goplus/llgo/internal/pclnpost" "github.com/goplus/llgo/internal/typepatch" "github.com/goplus/llgo/ssa/abi" xenv "github.com/goplus/llgo/xtool/env" @@ -477,6 +478,7 @@ func Do(args []string, conf *Config) ([]Package, error) { if err != nil { return nil, err } + rewritePrebuiltFuncTab(ctx, outFmts.Out, verbose) if conf.Mode == ModeBuild && conf.SizeReport { if err := reportBinarySize(outFmts.Out, conf.SizeFormat, conf.SizeLevel, allPkgs); err != nil { fmt.Fprintf(os.Stderr, "Warning: size report failed: %v\n", err) @@ -970,6 +972,32 @@ func compileExtraFiles(ctx *context, verbose bool) ([]string, error) { return objFiles, nil } +// rewritePrebuiltFuncTab runs the link-phase prebuilt-table rewrite on the +// linked executable: it deduplicates LTO inline copies of the funcinfo entry +// records against the symbol table and replaces the entry section with a +// sorted ftab plus findfunctab that the runtime adopts zero-copy (see +// internal/pclnpost and doc/design/pclntab-linkphase.md). Any failure leaves +// the binary fully functional on the first-use construction fallback. +func rewritePrebuiltFuncTab(ctx *context, out string, verbose bool) { + if ctx == nil || ctx.prog == nil || !ctx.prog.FuncInfoSitesEnabled() || !shouldEmitRuntimeSites(ctx) { + return + } + if ctx.buildConf.BuildMode != BuildModeExe { + return + } + st, err := pclnpost.Rewrite(out) + if err != nil { + if verbose { + fmt.Fprintf(os.Stderr, "llgo: prebuilt functab rewrite skipped: %v\n", err) + } + return + } + if verbose { + fmt.Fprintf(os.Stderr, "llgo: prebuilt functab: %d entries (%d LTO inline copies removed), %d buckets\n", + st.FtabEntries, st.InlineCopies, st.Buckets) + } +} + func linkMainPkg(ctx *context, pkg *packages.Package, pkgs []*aPackage, outputPath string, verbose bool) error { needRuntime := false needPyInit := false diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index 7bfbe6d636..ea57e7a865 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -865,6 +865,10 @@ func uint64Hex(v uint64) string { // emitRuntimeFuncInfoSites emits one zero record per used site section so the // section always exists and the linker-synthesized boundary symbols resolve // even when no package contributed records. Runtime skips zero records. +// funcInfoMetaRecordMagic marks the entry-section meta record consumed by +// internal/pclnpost ("LLGOMET1" little-endian). +const funcInfoMetaRecordMagic = uint64(0x3154454D4F474C4C) + func emitRuntimeFuncInfoSites(mod llvm.Module, pointerSize int, machO bool, pcSite bool, entrySite bool, stubSite bool) { if !pcSite && !entrySite && !stubSite { return @@ -888,6 +892,22 @@ func emitRuntimeFuncInfoSites(mod llvm.Module, pointerSize int, machO bool, pcSi } if entrySite { writeZeroRecord(entrySiteSectionInfo, "funcinfo_entry") + // Meta records for the link-phase tool: relocations carrying the + // addresses of the symbol-index pointer global and its count global. + // Relocations are resolved by the linker regardless of what LTO + // internalization does to the symbol table, which is what keeps this + // reachable in +LTO binaries. The runtime skips all three rows: the + // first has pc==0 and the other two have symbolID==0. + idxSym, cntSym := funcInfoSymbolIndexSymbol, funcInfoSymbolIndexCountSymbol + if machO { + idxSym, cntSym = "_"+idxSym, "_"+cntSym + } + asm.WriteString(ptrDirective + " 0\n") + asm.WriteString(".quad " + uint64Hex(funcInfoMetaRecordMagic) + "\n") + asm.WriteString(ptrDirective + " " + idxSym + "\n") + asm.WriteString(".quad 0\n") + asm.WriteString(ptrDirective + " " + cntSym + "\n") + asm.WriteString(".quad 0\n") } if stubSite { writeZeroRecord(stubSiteSectionInfo, "funcinfo_stubsite") diff --git a/internal/pclnpost/binary.go b/internal/pclnpost/binary.go new file mode 100644 index 0000000000..c65c9eddbb --- /dev/null +++ b/internal/pclnpost/binary.go @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Package pclnpost implements the P1/P2 prototype of link-phase ftab/findfunctab +// generation (doc/design/pclntab-linkphase.md). It parses a linked LLGo +// binary's funcinfo site sections, deduplicates LTO inline copies against the +// symbol table, sorts the entries, builds the Go-layout findfunctab via +// internal/pclntab, and prints what the P2 build integration would write +// back. It performs no writes; its purpose is to prove the risky steps on +// real binaries. +package pclnpost + +import ( + "debug/elf" + "debug/macho" + "encoding/binary" + "fmt" + "os" + "sort" + + "github.com/goplus/llgo/internal/pclntab" +) + +type siteRecord struct { + pc uint64 + symbolID uint64 +} + +type textSym struct { + addr uint64 + size uint64 + name string +} + +type secInfo struct { + vmaddr uint64 + size uint64 + fileOff uint64 +} + +type binaryInfo struct { + format string + raw []byte + entrySec []byte + stubSec []byte + textStart uint64 + textEnd uint64 + imageBase uint64 + syms []textSym // sorted by addr, text symbols only + secs []secInfo + + entryVMAddr, entryVMSize, entryFileOff uint64 + stubVMSize, stubFileOff uint64 +} + +// readVM returns n bytes at a link-time virtual address. +func readVM(info *binaryInfo, addr uint64, n int) []byte { + for _, s := range info.secs { + if addr >= s.vmaddr && addr+uint64(n) <= s.vmaddr+s.size { + off := s.fileOff + (addr - s.vmaddr) + return info.raw[off : off+uint64(n)] + } + } + return make([]byte, n) +} + +func load(path string) (*binaryInfo, error) { + if mf, err := macho.Open(path); err == nil { + defer mf.Close() + info := &binaryInfo{format: "macho"} + info.raw, _ = os.ReadFile(path) + for _, s := range mf.Sections { + info.secs = append(info.secs, secInfo{vmaddr: s.Addr, size: s.Size, fileOff: uint64(s.Offset)}) + } + if s := mf.Section("__llgo_fie"); s != nil { + info.entrySec, _ = s.Data() + info.entryVMAddr, info.entryVMSize, info.entryFileOff = s.Addr, s.Size, uint64(s.Offset) + } + if s := mf.Section("__llgo_stub"); s != nil { + info.stubSec, _ = s.Data() + info.stubVMSize, info.stubFileOff = s.Size, uint64(s.Offset) + } + if s := mf.Section("__text"); s != nil { + info.textStart, info.textEnd = s.Addr, s.Addr+s.Size + info.imageBase = s.Addr &^ 0xFFFFFFF + } + if mf.Symtab != nil { + for _, sym := range mf.Symtab.Syms { + if sym.Value >= info.textStart && sym.Value < info.textEnd && sym.Name != "" { + info.syms = append(info.syms, textSym{addr: sym.Value, name: sym.Name}) + } + } + } + finish(info) + return info, nil + } + ef, err := elf.Open(path) + if err != nil { + return nil, fmt.Errorf("not Mach-O and not ELF: %w", err) + } + defer ef.Close() + info := &binaryInfo{format: "elf"} + info.raw, _ = os.ReadFile(path) + for _, s := range ef.Sections { + if s.Type != elf.SHT_NOBITS && s.Addr != 0 { + info.secs = append(info.secs, secInfo{vmaddr: s.Addr, size: s.Size, fileOff: s.Offset}) + } + } + if s := ef.Section("llgo_funcinfo_entry"); s != nil { + info.entrySec, _ = s.Data() + info.entryVMAddr, info.entryVMSize, info.entryFileOff = s.Addr, s.Size, s.Offset + } + if s := ef.Section("llgo_funcinfo_stubsite"); s != nil { + info.stubSec, _ = s.Data() + info.stubVMSize, info.stubFileOff = s.Size, s.Offset + } + if s := ef.Section(".text"); s != nil { + info.textStart, info.textEnd = s.Addr, s.Addr+s.Size + info.imageBase = s.Addr &^ 0xFFFFFFF + } + syms, _ := ef.Symbols() + for _, sym := range syms { + if elf.ST_TYPE(sym.Info) == elf.STT_FUNC && sym.Value >= info.textStart && sym.Value < info.textEnd { + info.syms = append(info.syms, textSym{addr: sym.Value, size: sym.Size, name: sym.Name}) + } + } + finish(info) + return info, nil +} + +func finish(info *binaryInfo) { + sort.Slice(info.syms, func(i, j int) bool { return info.syms[i].addr < info.syms[j].addr }) + // Collapse same-address aliases, then derive missing extents from the + // next distinct symbol start (Mach-O nlist carries no sizes; Go's linker + // uses the same next-start rule for its final ftab). + dedup := info.syms[:0] + for _, s := range info.syms { + if len(dedup) > 0 && dedup[len(dedup)-1].addr == s.addr { + continue + } + dedup = append(dedup, s) + } + info.syms = dedup + for i := range info.syms { + if info.syms[i].size == 0 { + if i+1 < len(info.syms) { + info.syms[i].size = info.syms[i+1].addr - info.syms[i].addr + } else { + info.syms[i].size = info.textEnd - info.syms[i].addr + } + } + } +} + +func parseRecords(info *binaryInfo, sec []byte) []siteRecord { + var out []siteRecord + for off := 0; off+16 <= len(sec); off += 16 { + pc := binary.LittleEndian.Uint64(sec[off:]) + id := binary.LittleEndian.Uint64(sec[off+8:]) + if pc == 0 || id == 0 { // zero keep-alive record + continue + } + // Mach-O pointer slots in the on-disk file hold dyld chained-fixup + // encodings (DYLD_CHAINED_PTR_64: target in the low 36 bits, chain + // metadata above); dyld rewrites them at load. Decode when the raw + // value falls outside the text range but its low 36 bits fall + // inside. The P2 write-back avoids the problem entirely by storing + // anchor-relative offsets instead of pointers. + if info.format == "macho" && (pc < info.textStart || pc >= info.textEnd) { + if t := pc & (1<<36 - 1); t >= info.textStart && t < info.textEnd { + pc = t + } + } + out = append(out, siteRecord{pc: pc, symbolID: id}) + } + return out +} + +// owner returns the text symbol containing addr. +func owner(info *binaryInfo, addr uint64) (textSym, bool) { + i := sort.Search(len(info.syms), func(i int) bool { return info.syms[i].addr > addr }) + if i == 0 { + return textSym{}, false + } + s := info.syms[i-1] + if addr >= s.addr+s.size { + return textSym{}, false + } + return s, true +} + +// fnv64 mirrors funcInfoSymbolID in internal/build/funcinfo_table.go. +func fnv64(name string) uint64 { + const offset = uint64(14695981039346656037) + const prime = uint64(1099511628211) + h := offset + for i := 0; i < len(name); i++ { + h ^= uint64(name[i]) + h *= prime + } + if h == 0 { + return 1 + } + return h +} + +const stubPrefix = "__llgo_stub." + +// dedupe keeps exactly the canonical record per emitting function: a record +// is canonical when the symbol that owns its anchor PC is the function the +// symbolID names (id == fnv64(owner)) or that function's closure stub +// (owner "__llgo_stub.X" with id == fnv64(X) — stubs share the target's +// symbolID by design). Everything else with a known owner is an LTO inline +// copy: inlining duplicated the body-embedded record into a host function. +// Kept records are normalized to their owner's true entry address. Records +// whose owner cannot be determined are dropped conservatively. +func dedupe(info *binaryInfo, recs []siteRecord, verbose bool) (kept []siteRecord, droppedInline, droppedUnknown int) { + seenOwner := make(map[uint64]bool, len(recs)) + for _, r := range recs { + sym, ok := owner(info, r.pc) + if !ok { + droppedUnknown++ + continue + } + name := sym.name + if info.format == "macho" && len(name) > 0 && name[0] == '_' { + name = name[1:] + } + target := name + if len(name) > len(stubPrefix) && name[:len(stubPrefix)] == stubPrefix { + target = name[len(stubPrefix):] + } + if fnv64(target) != r.symbolID { + droppedInline++ + if verbose { + fmt.Printf(" inline copy: id=%#x pc=%#x inside %s\n", r.symbolID, r.pc, sym.name) + } + continue + } + if seenOwner[sym.addr] { + continue + } + seenOwner[sym.addr] = true + kept = append(kept, siteRecord{pc: sym.addr, symbolID: r.symbolID}) + } + return kept, droppedInline, droppedUnknown +} + +// buildFtab returns the sorted table plus the base PC (Go's minpc): offsets +// are relative to the first recorded function so ftab[0].EntryOff == 0, as +// internal/pclntab requires. +func buildFtab(info *binaryInfo, kept []siteRecord) ([]pclntab.FuncTabEntry, uint64) { + sort.Slice(kept, func(i, j int) bool { return kept[i].pc < kept[j].pc }) + if len(kept) == 0 { + return nil, info.textStart + } + base := kept[0].pc + ftab := make([]pclntab.FuncTabEntry, 0, len(kept)+1) + prev := uint64(0) + for i, r := range kept { + if r.pc == prev { + continue // two symbolIDs at one entry (aliases); keep first + } + prev = r.pc + ftab = append(ftab, pclntab.FuncTabEntry{EntryOff: uint32(r.pc - base), FuncOff: uint32(i)}) + } + // Go-style sentinel at end of text. + ftab = append(ftab, pclntab.FuncTabEntry{EntryOff: uint32(info.textEnd - base), FuncOff: ^uint32(0)}) + return ftab, base +} diff --git a/internal/pclnpost/fixups.go b/internal/pclnpost/fixups.go new file mode 100644 index 0000000000..46d61aff61 --- /dev/null +++ b/internal/pclnpost/fixups.go @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pclnpost + +import ( + "encoding/binary" + "fmt" +) + +// Mach-O dyld chained fixups surgery. +// +// The on-disk pointer slots of the rewritten sections participate in dyld's +// chained-fixup page chains. If the chains are left untouched, dyld walks +// them at load time and rebases 8-byte slots inside our freshly written +// table, corrupting it — and terminating a chain early inside a zeroed +// section would also skip later fixups in the same page, corrupting +// unrelated data. unchainRanges removes every chain node that falls inside +// the given file-offset ranges: predecessors' next links (or the page_start +// table) are repointed to the first surviving successor. + +const ( + lcDyldChainedFixups = 0x80000034 + lcSegment64 = 0x19 + + chainedPtrStartNone = 0xFFFF + chainedPtrStartMulti = 0x8000 + + // pointer_format values with a 12-bit next field at bit 51, stride 4. + chainedPtr64 = 2 + chainedPtr64Offset = 6 +) + +type segRange struct { + fileOff uint64 + fileSz uint64 +} + +// unchainRanges edits raw in place. ranges are file-offset [start, end) pairs. +func unchainRanges(raw []byte, ranges [][2]uint64) error { + inRange := func(off uint64) bool { + for _, r := range ranges { + if off >= r[0] && off < r[1] { + return true + } + } + return false + } + + // Locate LC_DYLD_CHAINED_FIXUPS and the segment table. + if len(raw) < 32 || binary.LittleEndian.Uint32(raw) != 0xFEEDFACF { + return fmt.Errorf("not a 64-bit little-endian Mach-O") + } + ncmds := binary.LittleEndian.Uint32(raw[16:]) + off := uint64(32) + var fixOff, fixSize uint64 + var segs []segRange + for i := uint32(0); i < ncmds; i++ { + cmd := binary.LittleEndian.Uint32(raw[off:]) + size := binary.LittleEndian.Uint32(raw[off+4:]) + switch cmd { + case lcDyldChainedFixups: + fixOff = uint64(binary.LittleEndian.Uint32(raw[off+8:])) + fixSize = uint64(binary.LittleEndian.Uint32(raw[off+12:])) + case lcSegment64: + segs = append(segs, segRange{ + fileOff: binary.LittleEndian.Uint64(raw[off+40:]), + fileSz: binary.LittleEndian.Uint64(raw[off+48:]), + }) + } + off += uint64(size) + } + if fixOff == 0 { + return nil // no chained fixups (classic dyld info); nothing to do + } + _ = fixSize + + hdr := raw[fixOff:] + startsOff := fixOff + uint64(binary.LittleEndian.Uint32(hdr[4:])) + segCount := binary.LittleEndian.Uint32(raw[startsOff:]) + if int(segCount) != len(segs) { + // seg_count counts all segments incl. ones without fixups; trust it. + } + for si := uint32(0); si < segCount; si++ { + segInfoOff := binary.LittleEndian.Uint32(raw[startsOff+4+uint64(si)*4:]) + if segInfoOff == 0 { + continue + } + sOff := startsOff + uint64(segInfoOff) + pageSize := uint64(binary.LittleEndian.Uint16(raw[sOff+4:])) + ptrFormat := binary.LittleEndian.Uint16(raw[sOff+6:]) + pageCount := uint64(binary.LittleEndian.Uint16(raw[sOff+20:])) + if ptrFormat != chainedPtr64 && ptrFormat != chainedPtr64Offset { + // Only reject if this segment's pages intersect our ranges. + segFile := segs[si].fileOff + touches := false + for _, r := range ranges { + if r[0] < segFile+pageCount*pageSize && r[1] > segFile { + touches = true + } + } + if !touches { + continue + } + return fmt.Errorf("unsupported pointer_format %d", ptrFormat) + } + segFileOff := segs[si].fileOff + for pi := uint64(0); pi < pageCount; pi++ { + psOff := sOff + 22 + pi*2 + pStart := binary.LittleEndian.Uint16(raw[psOff:]) + if pStart == chainedPtrStartNone { + continue + } + if pStart&chainedPtrStartMulti != 0 { + return fmt.Errorf("multi-start pages not supported") + } + pageFile := segFileOff + pi*pageSize + // Collect the chain. + var nodes []uint64 + node := pageFile + uint64(pStart) + for { + nodes = append(nodes, node) + val := binary.LittleEndian.Uint64(raw[node:]) + next := (val >> 51) & 0xFFF + if next == 0 { + break + } + node += next * 4 + } + // Rebuild keeping only out-of-range nodes. + var kept []uint64 + removed := 0 + for _, n := range nodes { + if inRange(n) { + removed++ + } else { + kept = append(kept, n) + } + } + if removed == 0 { + continue + } + if len(kept) == 0 { + binary.LittleEndian.PutUint16(raw[psOff:], chainedPtrStartNone) + continue + } + binary.LittleEndian.PutUint16(raw[psOff:], uint16(kept[0]-pageFile)) + for i, n := range kept { + val := binary.LittleEndian.Uint64(raw[n:]) + var next uint64 + if i+1 < len(kept) { + delta := kept[i+1] - n + if delta%4 != 0 || delta/4 > 0xFFF { + return fmt.Errorf("chain gap %d not encodable", delta) + } + next = delta / 4 + } + val = (val &^ (uint64(0xFFF) << 51)) | (next << 51) + binary.LittleEndian.PutUint64(raw[n:], val) + } + } + } + return nil +} diff --git a/internal/pclnpost/pclnpost.go b/internal/pclnpost/pclnpost.go new file mode 100644 index 0000000000..c65f815054 --- /dev/null +++ b/internal/pclnpost/pclnpost.go @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pclnpost + +import ( + "encoding/binary" + "fmt" +) + +// Stats summarizes one rewrite. +type Stats struct { + Format string + EntryRecords int + StubRecords int + Kept int + InlineCopies int + NoSymbol int + FtabEntries int + Buckets int +} + +// Rewrite parses the linked binary's funcinfo site sections, deduplicates +// LTO inline copies against the symbol table, builds the Go-layout prebuilt +// table and rewrites the entry section in place (voiding the stub section). +// The runtime adopts the table when it sees the magic header and falls back +// to first-use construction otherwise, so failures here leave a fully +// functional binary. +func Rewrite(path string) (Stats, error) { + var st Stats + info, err := load(path) + if err != nil { + return st, err + } + st.Format = info.format + if len(info.entrySec) >= 8 && binary.LittleEndian.Uint64(info.entrySec) == prebuiltMagic { + return st, fmt.Errorf("already rewritten") + } + entries := parseRecords(info, info.entrySec) + stubs := parseRecords(info, info.stubSec) + st.EntryRecords, st.StubRecords = len(entries), len(stubs) + if len(entries) == 0 { + return st, fmt.Errorf("no entry records") + } + kept, inline, nosym := dedupe(info, append(entries, stubs...), false) + st.Kept, st.InlineCopies, st.NoSymbol = len(kept), inline, nosym + if len(kept) == 0 { + return st, fmt.Errorf("no records survived dedup") + } + ftab, buckets, err := writeBack(path, info, kept) + if err != nil { + return st, err + } + st.FtabEntries, st.Buckets = ftab, buckets + return st, nil +} diff --git a/chore/pclnpost/write.go b/internal/pclnpost/write.go similarity index 67% rename from chore/pclnpost/write.go rename to internal/pclnpost/write.go index cd269af6d2..b6249c333f 100644 --- a/chore/pclnpost/write.go +++ b/internal/pclnpost/write.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package main +package pclnpost import ( "debug/elf" @@ -50,10 +50,10 @@ type symIndexEntry struct { // writeBack rewrites the entry-site section in place with the prebuilt table // and voids the stub section (its records are merged into the table). -func writeBack(path string, info *binaryInfo, kept []siteRecord) error { +func writeBack(path string, info *binaryInfo, kept []siteRecord) (ftabCount, bucketCount int, err error) { symIdx, err := loadSymbolIndex(path, info) if err != nil { - return err + return 0, 0, err } sort.Slice(kept, func(i, j int) bool { return kept[i].pc < kept[j].pc }) type row struct { @@ -74,7 +74,7 @@ func writeBack(path string, info *binaryInfo, kept []siteRecord) error { rows = append(rows, row{pc: r.pc, idx: idx}) } if len(rows) == 0 { - return fmt.Errorf("no resolvable entries") + return 0, 0, fmt.Errorf("no resolvable entries") } base := rows[0].pc count := len(rows) + 1 // + sentinel @@ -105,7 +105,7 @@ func writeBack(path string, info *binaryInfo, kept []siteRecord) error { subIdx := lastLE(bucketStart + uint64(s)*subbucketSize) delta := subIdx - baseIdx if delta < 0 || delta > 0xffff { - return fmt.Errorf("subbucket delta overflow: %d", delta) + return 0, 0, fmt.Errorf("subbucket delta overflow: %d", delta) } binary.LittleEndian.PutUint16(tmp[4+2*s:], uint16(delta)) } @@ -115,7 +115,7 @@ func writeBack(path string, info *binaryInfo, kept []siteRecord) error { need := 32 + count*8 + len(buckets) entrySize := int(info.entryVMSize) if need > entrySize { - return fmt.Errorf("prebuilt blob %dB does not fit entry section %dB", need, entrySize) + return 0, 0, fmt.Errorf("prebuilt blob %dB does not fit entry section %dB", need, entrySize) } blob := make([]byte, entrySize) // zero tail binary.LittleEndian.PutUint64(blob[0:], prebuiltMagic) @@ -134,41 +134,83 @@ func writeBack(path string, info *binaryInfo, kept []siteRecord) error { off += 8 copy(blob[off:], buckets) - f, err := os.OpenFile(path, os.O_RDWR, 0) - if err != nil { - return err - } - defer f.Close() - if _, err := f.WriteAt(blob, int64(info.entryFileOff)); err != nil { - return err + raw := make([]byte, len(info.raw)) + copy(raw, info.raw) + if info.format == "macho" { + // Remove the rewritten sections' pointer slots from dyld's chained + // fixup page chains first: otherwise dyld rebases 8-byte slots + // inside the new table at load time, and a chain terminating early + // inside the zeroed stub section would skip unrelated fixups later + // in the same page. + ranges := [][2]uint64{{info.entryFileOff, info.entryFileOff + info.entryVMSize}} + if info.stubVMSize > 0 { + ranges = append(ranges, [2]uint64{info.stubFileOff, info.stubFileOff + info.stubVMSize}) + } + if err := unchainRanges(raw, ranges); err != nil { + return 0, 0, fmt.Errorf("chained fixups: %w", err) + } } + copy(raw[info.entryFileOff:], blob) // Void the stub section: zero its records so the runtime's fallback scan // finds nothing (stub entries are already merged into the table above). if info.stubVMSize > 0 { - if _, err := f.WriteAt(make([]byte, int(info.stubVMSize)), int64(info.stubFileOff)); err != nil { - return err + zero := raw[info.stubFileOff : info.stubFileOff+info.stubVMSize] + for i := range zero { + zero[i] = 0 } } + st, err := os.Stat(path) + if err != nil { + return 0, 0, err + } + if err := os.WriteFile(path, raw, st.Mode()); err != nil { + return 0, 0, err + } if info.format == "macho" && runtime.GOOS == "darwin" { if out, err := exec.Command("codesign", "-f", "-s", "-", path).CombinedOutput(); err != nil { - return fmt.Errorf("codesign: %v: %s", err, out) + return 0, 0, fmt.Errorf("codesign: %v: %s", err, out) } } - fmt.Printf("write-back: ftab=%d buckets=%d blob=%dB into section of %dB\n", - count, len(buckets)/bucketBytes, need, entrySize) - return nil + return count, len(buckets) / bucketBytes, nil } -// loadSymbolIndex reads the {u64 symbolID, u32 funcIndex} table through the -// exported pointer globals (the data itself is a private symbol). -func loadSymbolIndex(path string, info *binaryInfo) ([]symIndexEntry, error) { - ptrAddr, err := symbolAddr(path, "__llgo_funcinfo_symbol_index") - if err != nil { - return nil, err +// metaRecordMagic marks the entry-section meta record ("LLGOMET1" LE); keep +// in sync with internal/build/funcinfo_table.go. +const metaRecordMagic = uint64(0x3154454D4F474C4C) + +// metaGlobalAddrs scans the raw entry section for the meta record and +// returns the link-time addresses of the symbol-index pointer global and its +// count global. Works in +LTO binaries where the symbols are internalized +// away, because the addresses come from relocations, not the symbol table. +func metaGlobalAddrs(info *binaryInfo) (idxPtr, cntPtr uint64, ok bool) { + sec := info.entrySec + for off := 0; off+48 <= len(sec); off += 16 { + pc := binary.LittleEndian.Uint64(sec[off:]) + id := binary.LittleEndian.Uint64(sec[off+8:]) + if pc == 0 && id == metaRecordMagic { + idxPtr = decodePtrVal(info, binary.LittleEndian.Uint64(sec[off+16:])) + cntPtr = decodePtrVal(info, binary.LittleEndian.Uint64(sec[off+32:])) + return idxPtr, cntPtr, idxPtr != 0 && cntPtr != 0 + } } - cntAddr, err := symbolAddr(path, "__llgo_funcinfo_symbol_index_count") - if err != nil { - return nil, err + return 0, 0, false +} + +// loadSymbolIndex reads the {u64 symbolID, u32 funcIndex} table, locating it +// through the entry-section meta record (LTO-safe) with the symbol table as +// fallback for older binaries. +func loadSymbolIndex(path string, info *binaryInfo) ([]symIndexEntry, error) { + ptrAddr, cntAddr, ok := metaGlobalAddrs(info) + if !ok { + var err error + ptrAddr, err = symbolAddr(path, "__llgo_funcinfo_symbol_index") + if err != nil { + return nil, err + } + cntAddr, err = symbolAddr(path, "__llgo_funcinfo_symbol_index_count") + if err != nil { + return nil, err + } } dataAddr := decodePtr(info, readVM(info, ptrAddr, 8)) count := binary.LittleEndian.Uint64(readVM(info, cntAddr, 8)) @@ -196,7 +238,10 @@ func lookupSymIndex(idx []symIndexEntry, id uint64) (uint32, bool) { // decodePtr resolves an on-disk pointer slot (Mach-O chained fixup or plain). func decodePtr(info *binaryInfo, b []byte) uint64 { - v := binary.LittleEndian.Uint64(b) + return decodePtrVal(info, binary.LittleEndian.Uint64(b)) +} + +func decodePtrVal(info *binaryInfo, v uint64) uint64 { if info.format == "macho" { if t := v & (1<<36 - 1); t != v && t >= info.imageBase { return t diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 52508f0306..95957566f9 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -841,21 +841,33 @@ func prebuiltFrameCount() int { // materializePrebuiltEntries lazily builds the funcIndex -> entry map that // only the pcline initializer consumes; FuncForPC lookups never pay for it. +// Two-phase (busy/done) so concurrent losers wait for the winner's store. func materializePrebuiltEntries() { - if !latomic.CompareAndSwapUint32(&runtimePrebuiltEntriesOnce, 0, 1) { - return - } - entries := make([]uintptr, runtimeFuncInfoCount+1) - for _, e := range runtimePrebuiltFtab[:prebuiltFrameCount()] { - if e.funcIndex == 0 || uintptr(e.funcIndex) > runtimeFuncInfoCount { - continue - } - pc := runtimePrebuiltBase + uintptr(e.entryOff) - if entries[e.funcIndex] == 0 || pc < entries[e.funcIndex] { - entries[e.funcIndex] = pc + for { + switch latomic.LoadUint32(&runtimePrebuiltEntriesOnce) { + case 2: + return + case 0: + if !latomic.CompareAndSwapUint32(&runtimePrebuiltEntriesOnce, 0, 1) { + continue + } + entries := make([]uintptr, runtimeFuncInfoCount+1) + for _, e := range runtimePrebuiltFtab[:prebuiltFrameCount()] { + if e.funcIndex == 0 || uintptr(e.funcIndex) > runtimeFuncInfoCount { + continue + } + pc := runtimePrebuiltBase + uintptr(e.entryOff) + if entries[e.funcIndex] == 0 || pc < entries[e.funcIndex] { + entries[e.funcIndex] = pc + } + } + runtimeFuncPCEntries = entries + latomic.StoreUint32(&runtimePrebuiltEntriesOnce, 2) + return + default: + c.Usleep(1) } } - runtimeFuncPCEntries = entries } func initRuntimeFuncPCFramesOnce() { @@ -1769,7 +1781,11 @@ func pcLineFrameForPC(pc, entry uintptr) (pcSymbol, bool) { return pcSymbol{}, false } frame := frames[idx] - if entry != 0 && frame.entry != 0 && frame.entry != entry { + // When the caller knows the function entry, only accept a site from the + // same function. A site with an unresolved entry cannot prove it belongs + // to the queried function, so it must be rejected too — otherwise a + // nearest-below hit from a neighboring function leaks its file/line. + if entry != 0 && frame.entry != entry { return pcSymbol{}, false } return pcSymbol{ From 781575a14ccd0fcece2b48c4c6142a5ddfc691a0 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 21:56:51 +0800 Subject: [PATCH 43/59] pclnpost: resolve Mach-O bind-encoded site records into the prebuilt table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Mach-O, pointer slots that name exported functions — every __llgo_stub.* wrapper and any exported Go function — are emitted as chained-fixup BIND nodes, not rebases. The rewriter only decoded rebase nodes, so all stub records (and some entry records) were dropped as unowned and never reached the prebuilt ftab; FuncForPC on function values silently fell back to dladdr (~6µs per fresh pc on darwin). - Parse the LC_DYLD_CHAINED_FIXUPS imports table and resolve bind ordinals to their in-image definitions. - Match canonical owners against the record symbolID with underscore normalization (debug/macho's suffix-shared string table can surface one mangling underscore more or less than the source-level name). - Splice the prebuilt header's base slot back into the fixup chain as a live rebase node: dyld writes the slid text base at load, so the runtime reads a ready runtime PC with no slide arithmetic (non-PIE ELF link-time values already equal runtime addresses). - LLGO_PCLNPOST=0 escape hatch keeps first-use construction. Fresh-pc FuncForPC slow path: darwin 6-8µs -> 1.2-1.7µs, linux 6.8µs -> 0.5µs; first-in-process lookup: darwin ~32µs -> ~14µs, linux ~6.8µs -> ~4µs. Co-Authored-By: Claude Fable 5 --- internal/build/build.go | 3 + internal/pclnpost/binary.go | 155 ++++++++++++++++++++++--- internal/pclnpost/binary_test.go | 49 ++++++++ internal/pclnpost/fixups.go | 134 ++++++++++++++++----- internal/pclnpost/write.go | 17 ++- runtime/internal/lib/runtime/symtab.go | 17 +-- 6 files changed, 319 insertions(+), 56 deletions(-) create mode 100644 internal/pclnpost/binary_test.go diff --git a/internal/build/build.go b/internal/build/build.go index 64d7e331c6..db02fd67f3 100644 --- a/internal/build/build.go +++ b/internal/build/build.go @@ -985,6 +985,9 @@ func rewritePrebuiltFuncTab(ctx *context, out string, verbose bool) { if ctx.buildConf.BuildMode != BuildModeExe { return } + if os.Getenv("LLGO_PCLNPOST") == "0" { // escape hatch: keep first-use construction + return + } st, err := pclnpost.Rewrite(out) if err != nil { if verbose { diff --git a/internal/pclnpost/binary.go b/internal/pclnpost/binary.go index c65c9eddbb..0a9ce4a6e1 100644 --- a/internal/pclnpost/binary.go +++ b/internal/pclnpost/binary.go @@ -61,6 +61,11 @@ type binaryInfo struct { imageBase uint64 syms []textSym // sorted by addr, text symbols only secs []secInfo + // Mach-O chained-fixup import targets, ordinal -> resolved vmaddr (0 if + // the import name has no local definition). Exported symbols' pointer + // slots are emitted as BIND nodes even when they bind to this image, so + // record decoding needs the imports table, not just rebase decoding. + bindTargets []uint64 entryVMAddr, entryVMSize, entryFileOff uint64 stubVMSize, stubFileOff uint64 @@ -104,6 +109,7 @@ func load(path string) (*binaryInfo, error) { } } } + loadBindTargets(info, mf) finish(info) return info, nil } @@ -174,13 +180,23 @@ func parseRecords(info *binaryInfo, sec []byte) []siteRecord { continue } // Mach-O pointer slots in the on-disk file hold dyld chained-fixup - // encodings (DYLD_CHAINED_PTR_64: target in the low 36 bits, chain - // metadata above); dyld rewrites them at load. Decode when the raw - // value falls outside the text range but its low 36 bits fall - // inside. The P2 write-back avoids the problem entirely by storing - // anchor-relative offsets instead of pointers. + // encodings; dyld rewrites them at load. Rebase nodes + // (DYLD_CHAINED_PTR_64) carry the target in the low 36 bits. Anchors + // naming *exported* functions — every `__llgo_stub.*` and any + // exported Go function — are emitted as BIND nodes instead (bit 63 + // set, import ordinal in the low 24 bits, addend above), even though + // they bind back into this same image, so those resolve through the + // imports table. The P2 write-back avoids the problem entirely by + // storing anchor-relative offsets instead of pointers. if info.format == "macho" && (pc < info.textStart || pc >= info.textEnd) { - if t := pc & (1<<36 - 1); t >= info.textStart && t < info.textEnd { + if pc>>63 != 0 { // DYLD_CHAINED_PTR_64_BIND + ordinal := pc & (1<<24 - 1) + addend := (pc >> 24) & 0xFF + if ordinal >= uint64(len(info.bindTargets)) || info.bindTargets[ordinal] == 0 { + continue + } + pc = info.bindTargets[ordinal] + addend + } else if t := pc & (1<<36 - 1); t >= info.textStart && t < info.textEnd { pc = t } } @@ -219,6 +235,44 @@ func fnv64(name string) uint64 { const stubPrefix = "__llgo_stub." +// canonicalOwner reports whether owner symbol `name` is the function the +// record's symbolID names, or that function's `__llgo_stub.` wrapper. +// Mach-O symbol names carry a C-mangling underscore, and debug/macho's +// suffix-shared string table can surface one underscore more or less than +// the source-level name, so try each plausible normalization — matching a +// specific 64-bit FNV makes a false positive practically impossible. +func canonicalOwner(info *binaryInfo, name string, symbolID uint64) bool { + for { + cand := name + if len(cand) > len(stubPrefix) { + if i := stringIndex(cand, stubPrefix); i >= 0 { + cand = cand[i+len(stubPrefix):] + } + } + if fnv64(cand) == symbolID { + return true + } + if info.format == "macho" && len(name) > 1 && name[0] == '_' { + name = name[1:] + continue + } + return false + } +} + +func stringIndex(s, prefix string) int { + // prefix at the start, allowing for leading mangling underscores only + for i := 0; i+len(prefix) <= len(s) && i <= 2; i++ { + if s[i:i+len(prefix)] == prefix { + return i + } + if s[i] != '_' { + break + } + } + return -1 +} + // dedupe keeps exactly the canonical record per emitting function: a record // is canonical when the symbol that owns its anchor PC is the function the // symbolID names (id == fnv64(owner)) or that function's closure stub @@ -235,15 +289,7 @@ func dedupe(info *binaryInfo, recs []siteRecord, verbose bool) (kept []siteRecor droppedUnknown++ continue } - name := sym.name - if info.format == "macho" && len(name) > 0 && name[0] == '_' { - name = name[1:] - } - target := name - if len(name) > len(stubPrefix) && name[:len(stubPrefix)] == stubPrefix { - target = name[len(stubPrefix):] - } - if fnv64(target) != r.symbolID { + if !canonicalOwner(info, sym.name, r.symbolID) { droppedInline++ if verbose { fmt.Printf(" inline copy: id=%#x pc=%#x inside %s\n", r.symbolID, r.pc, sym.name) @@ -281,3 +327,82 @@ func buildFtab(info *binaryInfo, kept []siteRecord) ([]pclntab.FuncTabEntry, uin ftab = append(ftab, pclntab.FuncTabEntry{EntryOff: uint32(info.textEnd - base), FuncOff: ^uint32(0)}) return ftab, base } + +// loadBindTargets parses the LC_DYLD_CHAINED_FIXUPS imports table and +// resolves each import ordinal to the address of its local definition (this +// is a main executable: every funcinfo bind target is defined in-image). +func loadBindTargets(info *binaryInfo, mf *macho.File) { + raw := info.raw + if len(raw) < 32 { + return + } + ncmds := binary.LittleEndian.Uint32(raw[16:]) + var fixOff, fixSize uint64 + off := uint64(32) + for i := uint32(0); i < ncmds && off+8 <= uint64(len(raw)); i++ { + cmd := binary.LittleEndian.Uint32(raw[off:]) + size := binary.LittleEndian.Uint32(raw[off+4:]) + if cmd == lcDyldChainedFixups { + fixOff = uint64(binary.LittleEndian.Uint32(raw[off+8:])) + fixSize = uint64(binary.LittleEndian.Uint32(raw[off+12:])) + } + off += uint64(size) + } + if fixOff == 0 || fixOff+28 > uint64(len(raw)) { + return + } + hdr := raw[fixOff : fixOff+fixSize] + importsOff := binary.LittleEndian.Uint32(hdr[8:]) + symbolsOff := binary.LittleEndian.Uint32(hdr[12:]) + importsCount := binary.LittleEndian.Uint32(hdr[16:]) + importsFormat := binary.LittleEndian.Uint32(hdr[20:]) + if importsCount == 0 || importsCount > 1<<24 { + return + } + var stride, nameShift uint32 + switch importsFormat { + case 1: // DYLD_CHAINED_IMPORT: u32 {lib:8, weak:1, name_offset:23} + stride, nameShift = 4, 9 + case 2: // DYLD_CHAINED_IMPORT_ADDEND: {u32, i32 addend} + stride, nameShift = 8, 9 + default: // ADDEND64 or unknown: leave unresolved + return + } + byName := make(map[string]uint64, len(info.syms)) + if mf.Symtab != nil { + for _, sym := range mf.Symtab.Syms { + if sym.Value != 0 && sym.Name != "" { + byName[sym.Name] = sym.Value + } + } + } + cstr := func(b []byte) string { + for i, c := range b { + if c == 0 { + return string(b[:i]) + } + } + return string(b) + } + targets := make([]uint64, importsCount) + for i := uint32(0); i < importsCount; i++ { + rec := uint64(importsOff) + uint64(i*stride) + if rec+4 > uint64(len(hdr)) { + break + } + v := binary.LittleEndian.Uint32(hdr[rec:]) + nameOff := uint64(symbolsOff) + uint64(v>>nameShift) + if nameOff >= uint64(len(hdr)) { + continue + } + name := cstr(hdr[nameOff:]) + addr, ok := byName[name] + if !ok && len(name) > 1 && name[0] == '_' { + // debug/macho's Symtab names may carry one less mangling + // underscore than the import strings. + addr = byName[name[1:]] + } + targets[i] = addr + } + info.bindTargets = targets +} diff --git a/internal/pclnpost/binary_test.go b/internal/pclnpost/binary_test.go new file mode 100644 index 0000000000..3ea8508e65 --- /dev/null +++ b/internal/pclnpost/binary_test.go @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pclnpost + +import "testing" + +func TestCanonicalOwner(t *testing.T) { + elf := &binaryInfo{format: "elf"} + macho := &binaryInfo{format: "macho"} + id := fnv64("example.com/p.F") + cases := []struct { + info *binaryInfo + name string + want bool + }{ + // ELF: symbol names are source-level. + {elf, "example.com/p.F", true}, + {elf, "__llgo_stub.example.com/p.F", true}, + {elf, "example.com/p.G", false}, + // Mach-O: one C-mangling underscore, and debug/macho's suffix-shared + // string table can surface one underscore more or less. + {macho, "_example.com/p.F", true}, + {macho, "example.com/p.F", true}, + {macho, "___llgo_stub.example.com/p.F", true}, + {macho, "__llgo_stub.example.com/p.F", true}, + // An LTO inline copy: record id names F but the owner is the host. + {macho, "_example.com/p.Host", false}, + {macho, "___llgo_stub.example.com/p.G", false}, + } + for _, c := range cases { + if got := canonicalOwner(c.info, c.name, id); got != c.want { + t.Errorf("canonicalOwner(%s, %q) = %v, want %v", c.info.format, c.name, got, c.want) + } + } +} diff --git a/internal/pclnpost/fixups.go b/internal/pclnpost/fixups.go index 46d61aff61..8af02d419d 100644 --- a/internal/pclnpost/fixups.go +++ b/internal/pclnpost/fixups.go @@ -19,6 +19,7 @@ package pclnpost import ( "encoding/binary" "fmt" + "sort" ) // Mach-O dyld chained fixups surgery. @@ -47,10 +48,28 @@ const ( type segRange struct { fileOff uint64 fileSz uint64 + vmaddr uint64 } -// unchainRanges edits raw in place. ranges are file-offset [start, end) pairs. -func unchainRanges(raw []byte, ranges [][2]uint64) error { +// fixupInsert asks for a rebase fixup node at fileOff whose loaded value will +// be targetVM + slide. Slot bytes are returned as pending writes so the +// caller can apply them after overwriting the section contents. +type fixupInsert struct { + fileOff uint64 + targetVM uint64 +} + +type pendingWrite struct { + fileOff uint64 + val uint64 +} + +// unchainRanges edits chain metadata in raw in place: every chain node inside +// `ranges` (file-offset [start,end) pairs) is unlinked, and the requested +// `inserts` are spliced into the page chains as rebase nodes. Because insert +// slots usually lie inside a section the caller is about to overwrite, their +// encoded slot values are returned as pending writes to apply afterwards. +func unchainRanges(raw []byte, ranges [][2]uint64, inserts []fixupInsert) ([]pendingWrite, error) { inRange := func(off uint64) bool { for _, r := range ranges { if off >= r[0] && off < r[1] { @@ -62,7 +81,7 @@ func unchainRanges(raw []byte, ranges [][2]uint64) error { // Locate LC_DYLD_CHAINED_FIXUPS and the segment table. if len(raw) < 32 || binary.LittleEndian.Uint32(raw) != 0xFEEDFACF { - return fmt.Errorf("not a 64-bit little-endian Mach-O") + return nil, fmt.Errorf("not a 64-bit little-endian Mach-O") } ncmds := binary.LittleEndian.Uint32(raw[16:]) off := uint64(32) @@ -77,6 +96,7 @@ func unchainRanges(raw []byte, ranges [][2]uint64) error { fixSize = uint64(binary.LittleEndian.Uint32(raw[off+12:])) case lcSegment64: segs = append(segs, segRange{ + vmaddr: binary.LittleEndian.Uint64(raw[off+24:]), fileOff: binary.LittleEndian.Uint64(raw[off+40:]), fileSz: binary.LittleEndian.Uint64(raw[off+48:]), }) @@ -84,9 +104,20 @@ func unchainRanges(raw []byte, ranges [][2]uint64) error { off += uint64(size) } if fixOff == 0 { - return nil // no chained fixups (classic dyld info); nothing to do + if len(inserts) > 0 { + return nil, fmt.Errorf("no chained fixups to splice inserts into") + } + return nil, nil // no chained fixups (classic dyld info); nothing to do } _ = fixSize + imageBase := ^uint64(0) + for _, sg := range segs { + if sg.vmaddr != 0 && sg.vmaddr < imageBase { + imageBase = sg.vmaddr + } + } + var pending []pendingWrite + consumed := make(map[uint64]bool, len(inserts)) hdr := raw[fixOff:] startsOff := fixOff + uint64(binary.LittleEndian.Uint32(hdr[4:])) @@ -115,63 +146,102 @@ func unchainRanges(raw []byte, ranges [][2]uint64) error { if !touches { continue } - return fmt.Errorf("unsupported pointer_format %d", ptrFormat) + return nil, fmt.Errorf("unsupported pointer_format %d", ptrFormat) + } + encode := func(targetVM, next uint64) uint64 { + t := targetVM + if ptrFormat == chainedPtr64Offset { + t = targetVM - imageBase + } + return (t & (1<<36 - 1)) | (next << 51) } segFileOff := segs[si].fileOff for pi := uint64(0); pi < pageCount; pi++ { psOff := sOff + 22 + pi*2 + pageFile := segFileOff + pi*pageSize + pageEnd := pageFile + pageSize + // Inserts requested for this page. + var ins []fixupInsert + for _, in := range inserts { + if in.fileOff >= pageFile && in.fileOff < pageEnd { + ins = append(ins, in) + consumed[in.fileOff] = true + } + } pStart := binary.LittleEndian.Uint16(raw[psOff:]) - if pStart == chainedPtrStartNone { + if pStart == chainedPtrStartNone && len(ins) == 0 { continue } - if pStart&chainedPtrStartMulti != 0 { - return fmt.Errorf("multi-start pages not supported") + if pStart != chainedPtrStartNone && pStart&chainedPtrStartMulti != 0 { + return nil, fmt.Errorf("multi-start pages not supported") } - pageFile := segFileOff + pi*pageSize - // Collect the chain. + // Collect the existing chain. var nodes []uint64 - node := pageFile + uint64(pStart) - for { - nodes = append(nodes, node) - val := binary.LittleEndian.Uint64(raw[node:]) - next := (val >> 51) & 0xFFF - if next == 0 { - break + if pStart != chainedPtrStartNone { + node := pageFile + uint64(pStart) + for { + nodes = append(nodes, node) + val := binary.LittleEndian.Uint64(raw[node:]) + next := (val >> 51) & 0xFFF + if next == 0 { + break + } + node += next * 4 } - node += next * 4 } - // Rebuild keeping only out-of-range nodes. - var kept []uint64 + // Rebuild: out-of-range survivors plus requested inserts. + type finalNode struct { + off uint64 + insert bool + targetVM uint64 + } + var final []finalNode removed := 0 for _, n := range nodes { if inRange(n) { removed++ } else { - kept = append(kept, n) + final = append(final, finalNode{off: n}) } } - if removed == 0 { + for _, in := range ins { + final = append(final, finalNode{off: in.fileOff, insert: true, targetVM: in.targetVM}) + } + if removed == 0 && len(ins) == 0 { continue } - if len(kept) == 0 { + sort.Slice(final, func(i, j int) bool { return final[i].off < final[j].off }) + if len(final) == 0 { binary.LittleEndian.PutUint16(raw[psOff:], chainedPtrStartNone) continue } - binary.LittleEndian.PutUint16(raw[psOff:], uint16(kept[0]-pageFile)) - for i, n := range kept { - val := binary.LittleEndian.Uint64(raw[n:]) + binary.LittleEndian.PutUint16(raw[psOff:], uint16(final[0].off-pageFile)) + for i, n := range final { var next uint64 - if i+1 < len(kept) { - delta := kept[i+1] - n + if i+1 < len(final) { + delta := final[i+1].off - n.off if delta%4 != 0 || delta/4 > 0xFFF { - return fmt.Errorf("chain gap %d not encodable", delta) + return nil, fmt.Errorf("chain gap %d not encodable", delta) } next = delta / 4 } - val = (val &^ (uint64(0xFFF) << 51)) | (next << 51) - binary.LittleEndian.PutUint64(raw[n:], val) + if n.insert { + pending = append(pending, pendingWrite{fileOff: n.off, val: encode(n.targetVM, next)}) + } else { + val := binary.LittleEndian.Uint64(raw[n.off:]) + val = (val &^ (uint64(0xFFF) << 51)) | (next << 51) + binary.LittleEndian.PutUint64(raw[n.off:], val) + } } } } - return nil + // An unconsumed insert means its slot never joined a fixup chain; on a + // PIE binary the runtime would then read an unslid value. Fail loudly so + // the caller falls back to first-use construction instead. + for _, in := range inserts { + if !consumed[in.fileOff] { + return nil, fmt.Errorf("fixup insert at %#x not within any chain page", in.fileOff) + } + } + return pending, nil } diff --git a/internal/pclnpost/write.go b/internal/pclnpost/write.go index b6249c333f..8ccd24d11c 100644 --- a/internal/pclnpost/write.go +++ b/internal/pclnpost/write.go @@ -30,10 +30,14 @@ import ( // Prebuilt blob layout — keep in sync with runtime/internal/lib/runtime // (runtimePrebuiltMagic and adoptPrebuiltFuncPCTable): // -// u64 magic "LLGOFTB1"; u64 linkSectAddr; u64 linkBase +// u64 magic "LLGOFTB1"; u64 linkSectAddr; u64 base // u32 count (incl sentinel); u32 bucketCount // count × {u32 entryOff, u32 funcIndex} // bucketCount × {u32 idx; 16 × u16 subbuckets} +// +// The base slot holds the runtime PC of the first table entry: on Mach-O it +// is re-linked into the dyld chained-fixup chain (dyld rebases it at load), +// on non-PIE ELF the link-time value already equals the runtime address. const prebuiltMagic = uint64(0x314254464F474C4C) const ( @@ -136,6 +140,7 @@ func writeBack(path string, info *binaryInfo, kept []siteRecord) (ftabCount, buc raw := make([]byte, len(info.raw)) copy(raw, info.raw) + var pending []pendingWrite if info.format == "macho" { // Remove the rewritten sections' pointer slots from dyld's chained // fixup page chains first: otherwise dyld rebases 8-byte slots @@ -146,11 +151,19 @@ func writeBack(path string, info *binaryInfo, kept []siteRecord) (ftabCount, buc if info.stubVMSize > 0 { ranges = append(ranges, [2]uint64{info.stubFileOff, info.stubFileOff + info.stubVMSize}) } - if err := unchainRanges(raw, ranges); err != nil { + // The header's base slot is spliced back into the chain as a live + // rebase node: dyld writes the *slid* text base there at load, so + // the runtime reads a ready runtime PC with no slide arithmetic. + inserts := []fixupInsert{{fileOff: info.entryFileOff + 16, targetVM: base}} + pending, err = unchainRanges(raw, ranges, inserts) + if err != nil { return 0, 0, fmt.Errorf("chained fixups: %w", err) } } copy(raw[info.entryFileOff:], blob) + for _, pw := range pending { + binary.LittleEndian.PutUint64(raw[pw.fileOff:], pw.val) + } // Void the stub section: zero its records so the runtime's fallback scan // finds nothing (stub entries are already merged into the table above). if info.stubVMSize > 0 { diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 95957566f9..ec8935c4aa 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -764,13 +764,19 @@ func initRuntimeFuncPCFramesSlow() { // 8-byte aligned at the section start: // // u64 magic "LLGOFTB1" -// u64 linkSectAddr link-time vmaddr of this section (slide anchor) -// u64 linkBase link-time PC of the first table entry +// u64 linkSectAddr link-time vmaddr of this section (informational) +// u64 base runtime PC of the first table entry // u32 count ftab entries incl. trailing sentinel // u32 bucketCount findfunctab buckets (runtime uint16 layout) -// count × {u32 entryOff /* relative to linkBase */, u32 funcIndex} +// count × {u32 entryOff /* relative to base */, u32 funcIndex} // bucketCount × {u32 idx; 16 × u16 subbuckets} // +// The base slot is a live relocation: on Mach-O the rewriter splices it back +// into the dyld chained-fixup page chain (so dyld both pre-touches the +// table's pages at load and writes the slid address), and on non-PIE ELF the +// link-time value already equals the runtime address. Either way the slot +// holds a runtime PC — no slide arithmetic here. +// // The tool sorts, deduplicates LTO inline copies against the symbol table, // and normalizes entries to true symbol starts, so adopting the table also // retires first-use sorting and the dlsym/stub fallbacks. @@ -805,8 +811,7 @@ func adoptPrebuiltFuncPCTable() bool { if *(*uint64)(unsafe.Pointer(start)) != runtimePrebuiltMagic { return false } - linkSectAddr := *(*uint64)(unsafe.Pointer(start + 8)) - linkBase := *(*uint64)(unsafe.Pointer(start + 16)) + base := uintptr(*(*uint64)(unsafe.Pointer(start + 16))) count := *(*uint32)(unsafe.Pointer(start + 24)) bucketCount := *(*uint32)(unsafe.Pointer(start + 28)) need := uintptr(runtimePrebuiltHeaderSize) + uintptr(count)*8 + @@ -814,8 +819,6 @@ func adoptPrebuiltFuncPCTable() bool { if count < 2 || end < start+need || uintptr(count) > runtimeFuncInfoCount*16+1 { return false } - slide := start - uintptr(linkSectAddr) - base := uintptr(linkBase) + slide runtimePrebuiltBase = base runtimePrebuiltFtab = unsafe.Slice((*runtimePrebuiltFtabEntry)(unsafe.Pointer(start+runtimePrebuiltHeaderSize)), count) runtimeFuncPCIndex = runtimePCFindIndex{ From 629a60490388eed162039e7f066f1b64018a2eb1 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 21:56:51 +0800 Subject: [PATCH 44/59] benchmark: add plain ordinary-code scenario to runtime_funcinfo Pure-compute probes (recursive fib, JSON round-trip, sort.Ints, map churn) with no runtime introspection, so one harness run covers both the introspection extremes and what the funcinfo machinery costs code that never asks for it. Co-Authored-By: Claude Fable 5 --- benchmark/runtime_funcinfo/README.md | 7 ++- benchmark/runtime_funcinfo/main.go | 91 +++++++++++++++++++++++++++- 2 files changed, 95 insertions(+), 3 deletions(-) diff --git a/benchmark/runtime_funcinfo/README.md b/benchmark/runtime_funcinfo/README.md index d80a73bc13..b473f94602 100644 --- a/benchmark/runtime_funcinfo/README.md +++ b/benchmark/runtime_funcinfo/README.md @@ -13,6 +13,9 @@ It covers: - cold first-use runtime metadata paths, including lazy table initialization. - a stdlib-heavy program with `encoding/json`, `text/template`, `regexp`, `go/parser`, `go/token`, and `net/netip` imports. +- ordinary code (`plain`): pure-compute probes (recursive `fib`, JSON + round-trip, `sort.Ints`, map churn) with no runtime introspection at all, + measuring what the funcinfo machinery costs code that never asks for it. Generated modules use `example.com/llgo-bench/...` import paths. This is intentional: LLGo does not enable caller-frame tracking for stdlib-shaped paths @@ -46,8 +49,8 @@ run. Output is written to `benchmark/runtime_funcinfo/out` by default: Performance cells are `best/trimmed avg` from process-level runs. The trimmed average drops one minimum and one maximum when at least three runs are present. `-iters` is a base iteration count: `hot` uses the full count, `deep` uses a -quarter, and `multipkg`/`stdlib` use one twentieth because each operation does -substantially more work. +quarter, `multipkg`/`stdlib` use one twentieth, and `plain` uses 1/2000 +because each operation does substantially more work. `multipkg.FuncForPCMany` and `multipkg.FileLineMany` are batch metrics over all generated target functions (`-packages * -methods`, 144 targets with the default diff --git a/benchmark/runtime_funcinfo/main.go b/benchmark/runtime_funcinfo/main.go index c23cb75fd0..f72220b669 100644 --- a/benchmark/runtime_funcinfo/main.go +++ b/benchmark/runtime_funcinfo/main.go @@ -84,7 +84,7 @@ func main() { runs := flag.Int("runs", 11, "process runs per executable") iters := flag.Int("iters", 200000, "inner benchmark iterations") llgoOpt := flag.String("llgo-opt", "2", "LLGo optimization level passed as -O; empty disables the flag") - scenarioList := flag.String("scenarios", "hot,deep,multipkg,cold,stdlib", "comma-separated scenarios") + scenarioList := flag.String("scenarios", "hot,deep,multipkg,cold,stdlib,plain", "comma-separated scenarios") includeLTO := flag.Bool("include-lto", false, "also build full-LTO variants for LLGo compilers") pkgCount := flag.Int("packages", 12, "generated package count for multipkg") methodCount := flag.Int("methods", 12, "generated functions and methods per generated package") @@ -244,6 +244,8 @@ func generateScenarios(workDir string, names []string, pkgCount, methodCount int err = generateCold(dir, size.Packages, size.Methods) case "stdlib": err = generateStdlib(dir) + case "plain": + err = generatePlain(dir) default: return nil, fmt.Errorf("unknown scenario %q", name) } @@ -532,6 +534,13 @@ func generateStdlib(dir string) error { return os.WriteFile(filepath.Join(dir, "main.go"), []byte(stdlibSource), 0644) } +func generatePlain(dir string) error { + if err := writeModule(dir, "example.com/llgo-bench/plain"); err != nil { + return err + } + return os.WriteFile(filepath.Join(dir, "main.go"), []byte(plainSource), 0644) +} + func buildScenario(outDir string, sc scenario, v variant, llgoOpt string) buildResult { bin := filepath.Join(outDir, "bin", safeName(v.Name)+"_"+sc.Name) if v.LTO { @@ -621,6 +630,8 @@ func iterationsForScenario(name string, base int) int { div = 4 case "multipkg", "cold", "stdlib": div = 20 + case "plain": + div = 2000 } n := base / div if n < 1 { @@ -1315,3 +1326,81 @@ func main() { fmt.Println("sink=", sinkInt, sinkString) } ` + +// plainSource is the ordinary-code scenario: pure compute with no runtime +// introspection at all. It exists to measure what the funcinfo machinery +// costs code that never asks for it (site-asm inline/layout perturbation is +// the only expected effect; the tables themselves are free until first use). +const plainSource = `package main + +import ( + "encoding/json" + "fmt" + "os" + "sort" + "time" +) + +var sinkInt int + +` + commonBenchHelpers + ` + +//go:noinline +func fib(n int) int { + if n < 2 { + return n + } + return fib(n-1) + fib(n-2) +} + +type item struct { + Name string + Value int + Tags []string +} + +func main() { + iters := benchIters(5) + + measure("plain.fib30", iters, func() { + sinkInt += fib(30) + }) + + items := make([]item, 2000) + for i := range items { + items[i] = item{Name: fmt.Sprintf("item-%d", i), Value: i * 7, Tags: []string{"a", "b", "c"}} + } + measure("plain.json", iters, func() { + b, err := json.Marshal(items) + if err != nil { + panic(err) + } + var out []item + if err := json.Unmarshal(b, &out); err != nil { + panic(err) + } + sinkInt += len(out) + }) + + measure("plain.sort", iters, func() { + data := make([]int, 200000) + for i := range data { + data[i] = (i*2654435761 + 12345) % 1000003 + } + sort.Ints(data) + sinkInt += data[0] + }) + + measure("plain.map", iters, func() { + m := make(map[int]int, 16) + for i := 0; i < 200000; i++ { + m[(i*2654435761)%100003]++ + } + sinkInt += len(m) + }) + + if sinkInt == 0 { + os.Exit(1) + } +} +` From 7ab0a847060cdfb2b659623b0bd672f6dc9716cd Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 21:57:39 +0800 Subject: [PATCH 45/59] doc: record P3 findings in pclntab-linkphase design Co-Authored-By: Claude Fable 5 --- doc/design/pclntab-linkphase.md | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/doc/design/pclntab-linkphase.md b/doc/design/pclntab-linkphase.md index eb35f77d2a..cbbbaf776e 100644 --- a/doc/design/pclntab-linkphase.md +++ b/doc/design/pclntab-linkphase.md @@ -92,9 +92,15 @@ change is strictly additive and safe to land incrementally. step, wire the runtime fast path. Benchmarks: cold.FirstFuncForPC on both platforms; assert `llgo funcinfo: ... entries= prebuilt` via LLGO_FUNCINFO_DEBUG. -- **P3** Remove transitional runtime code (cold budget/scan, first-use sort - path stays as fallback but slack matching can go once anchors are exact - entries from the symbol table). +- **P3** (done) Mach-O bind-record resolution: pointer slots naming exported + functions — every `__llgo_stub.*` and any exported Go function — are + chained-fixup BIND nodes, not rebases; without decoding them through the + imports table, all stub records miss the prebuilt ftab and function-value + `FuncForPC` silently pays a dladdr per fresh pc (~6µs). Also: the prebuilt + header's base slot is spliced back into the fixup chain as a live rebase + node, so the runtime reads a dyld-slid runtime PC directly (no slide + arithmetic). Transitional cold budget/scan stays as the fallback for + non-rewritten binaries. - **P4** pcvalue-style line tables keyed by the prebuilt function order (replaces the call-site pcline records; gives instruction-level FileLine). @@ -112,6 +118,13 @@ change is strictly additive and safe to land incrementally. globals regardless, and `llvm.compiler.used` pins dead functions through the records' initializers. This is why records stay body-embedded inline asm and dedup happens post-link. +- Mach-O chained fixups encode anchors to exported symbols as BIND nodes + (import ordinal + addend), even when the target is defined in the same + image; only local-symbol anchors are rebases. Decode both. +- Adding fixup nodes does not pre-touch pages at load on modern macOS: + dyld uses page-in linking (the kernel applies fixups lazily at first + touch), so "sacrificial fixups to warm the table's pages" is not a + viable optimization — measured no effect on first-lookup latency. - `internal/pclntab` is a faithful port of Go 1.26's findfunctab generation and lookup (uint8 deltas, overflow error, forward scan, sentinel); the runtime's in-process variant deliberately uses uint16 deltas because LLGo From b8078d4c464d8432901e2bcbc44f9982cd2b444e Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 22:14:23 +0800 Subject: [PATCH 46/59] benchmark: keep full LTO from constant-folding the plain fib probe Co-Authored-By: Claude Fable 5 --- benchmark/runtime_funcinfo/main.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/benchmark/runtime_funcinfo/main.go b/benchmark/runtime_funcinfo/main.go index f72220b669..b0d2625a96 100644 --- a/benchmark/runtime_funcinfo/main.go +++ b/benchmark/runtime_funcinfo/main.go @@ -1362,8 +1362,16 @@ type item struct { func main() { iters := benchIters(5) + // Read the depth from the environment so full LTO cannot constant-fold + // the whole call away. + fibN := 30 + if s := getenv("PLAIN_FIB_N"); s != "" { + if n, err := atoi(s); err == nil && n > 0 { + fibN = n + } + } measure("plain.fib30", iters, func() { - sinkInt += fib(30) + sinkInt += fib(fibN) }) items := make([]item, 2000) From 64ae3268c353ad8e6c5fed3eea834d4f38f3e37d Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 22:34:18 +0800 Subject: [PATCH 47/59] runtime: pre-warm the prebuilt function table at startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Go's pclntab pages are touched by its own runtime (traceback, GC) long before user code queries it, so its first FuncForPC never pays page-in. Mirror that: when the prebuilt table is present, init adopts it (zero-copy, sub-µs), touches the pages the lookup path reads (blob, funcinfo records, string offsets, strings), runs one synthetic lookup to warm the code paths, and write-warms the FuncForPC cache pages. First-in-process FuncForPC: darwin ~17µs -> ~2.8µs, linux ~6.6µs -> ~1.0µs. Startup cost is page-count-bound (tens of µs on stdlib-sized tables, invisible next to ~3ms process startup; hello-world medians unchanged). Non-prebuilt binaries stay fully lazy: first-use construction allocates, which has no place in init, and programs that never introspect pay nothing. Co-Authored-By: Claude Fable 5 --- runtime/internal/lib/runtime/symtab.go | 63 ++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index ec8935c4aa..37d5606c38 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -1356,6 +1356,69 @@ func prebuiltFuncPCTablePresent() bool { return end >= start+8 && *(*uint64)(unsafe.Pointer(start)) == runtimePrebuiltMagic } +// runtimeFuncInfoWarmSink keeps the warm-up loads observable. +var runtimeFuncInfoWarmSink byte + +// init pre-warms the prebuilt function table, mirroring Go: Go's pclntab +// pages are touched by the runtime itself (traceback, GC) long before user +// code queries it, so its "first" FuncForPC never pays page-in. Touching the +// pages the lookup path reads — adopted blob, funcinfo records, string +// offsets, strings — moves first-touch page-in (plus, on darwin, +// code-signature validation) from the first user lookup to process startup +// (tens of µs once, on binaries that carry funcinfo tables). Without a +// prebuilt table everything stays lazy: first-use construction allocates, +// which has no place in init, and programs that never introspect pay +// nothing. +func init() { + if !prebuiltFuncPCTablePresent() { + return + } + initRuntimeFuncPCFrames() // zero-copy adoption, sub-µs + touch := func(base unsafe.Pointer, n uintptr) { + if base == nil || n == 0 { + return + } + const pageStep = 4096 + sink := runtimeFuncInfoWarmSink + p := uintptr(base) + for off := uintptr(0); off < n; off += pageStep { + sink += *(*byte)(unsafe.Pointer(p + off)) + } + sink += *(*byte)(unsafe.Pointer(p + n - 1)) + runtimeFuncInfoWarmSink = sink + } + start := uintptr(unsafe.Pointer(runtimeFuncInfoEntryStart)) + count := *(*uint32)(unsafe.Pointer(start + 24)) + bucketCount := *(*uint32)(unsafe.Pointer(start + 28)) + need := uintptr(runtimePrebuiltHeaderSize) + uintptr(count)*8 + + uintptr(bucketCount)*unsafe.Sizeof(runtimePCFindBucket{}) + touch(unsafe.Pointer(runtimeFuncInfoEntryStart), need) + touch(unsafe.Pointer(runtimeFuncInfoTable), + runtimeFuncInfoCount*unsafe.Sizeof(runtimeFuncInfoRecord{})) + touch(unsafe.Pointer(runtimeFuncInfoStringOffsets), + runtimeFuncInfoStringCount*unsafe.Sizeof(uint32(0))) + if runtimeFuncInfoStrings != nil && runtimeFuncInfoStringCount > 0 { + last := uintptr(*(*uint32)(unsafe.Add(unsafe.Pointer(runtimeFuncInfoStringOffsets), + (runtimeFuncInfoStringCount-1)*unsafe.Sizeof(uint32(0))))) + lastStr := funcInfoCString(uint16(runtimeFuncInfoStringCount - 1)) + touch(unsafe.Pointer(runtimeFuncInfoStrings), last+uintptr(cStringLen(lastStr))+1) + } + // One synthetic lookup warms the code paths themselves (allocator size + // classes, lookup caches), not just the data pages. + if prebuiltFrameCount() > 0 { + frame := prebuiltFrame(0) + if sym, ok := pcSymbolForFuncInfoIndex(frame.entry, frame.entry, frame.funcIndex); ok { + runtimeFuncInfoWarmSink += byte(len(sym.function)) + } + } + // Write-warm the FuncForPC cache: its first stores otherwise take + // zero-fill write faults, one per page, on the first few lookups. + for i := 0; i < funcForPCCacheSets; i += 4096 / int(unsafe.Sizeof(funcForPCCache[0])) { + funcForPCCache[i][0].pc = 0 + } + funcForPCCache[funcForPCCacheSets-1][0].pc = 0 +} + func coldFuncInfoEntryLookup(pc uintptr) (pcSymbol, bool) { if pc == 0 || prebuiltFuncPCTablePresent() { return pcSymbol{}, false From 2565fd099a568c5f87ed3d3187ce6000a95c6475 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 22:38:04 +0800 Subject: [PATCH 48/59] =?UTF-8?q?benchmark:=20scale=20dimensions=20?= =?UTF-8?q?=E2=80=94=20call=20depth=20and=20big-method=20scenarios?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -depths generates deep_ scenarios at configurable call depths; -bigsizes generates bigfunc scenarios (funcs x statements) whose large bodies stress statement-level pcline density, mid-function pc symbolization, and ordinary performance of big method bodies. Co-Authored-By: Claude Fable 5 --- benchmark/runtime_funcinfo/main.go | 204 +++++++++++++++++++++++++++-- 1 file changed, 193 insertions(+), 11 deletions(-) diff --git a/benchmark/runtime_funcinfo/main.go b/benchmark/runtime_funcinfo/main.go index b0d2625a96..618b78499a 100644 --- a/benchmark/runtime_funcinfo/main.go +++ b/benchmark/runtime_funcinfo/main.go @@ -89,6 +89,8 @@ func main() { pkgCount := flag.Int("packages", 12, "generated package count for multipkg") methodCount := flag.Int("methods", 12, "generated functions and methods per generated package") scaleList := flag.String("scales", "", "optional comma-separated package x method scales for multipkg/cold, for example 6x6,12x12,24x24") + depthList := flag.String("depths", "", "optional comma-separated call depths for the deep scenario, for example 32,128,512") + bigList := flag.String("bigsizes", "", "optional comma-separated funcs x statements sizes for the bigfunc scenario, for example 32x200,16x2000") flag.Var(&variants, "variant", "variant definition: name=go or name=llgo,/path/to/llgo,/path/to/root") flag.Parse() @@ -112,6 +114,14 @@ func main() { if err != nil { fatal(err) } + depths, err := parseInts(*depthList) + if err != nil { + fatal(err) + } + bigSizes, err := parseScalePairs(*bigList) + if err != nil { + fatal(err) + } absOut, err := filepath.Abs(*outDir) if err != nil { @@ -126,7 +136,7 @@ func main() { } } - scenarios, err := generateScenarios(filepath.Join(absOut, "work"), splitList(*scenarioList), *pkgCount, *methodCount, scales) + scenarios, err := generateScenarios(filepath.Join(absOut, "work"), splitList(*scenarioList), *pkgCount, *methodCount, scales, depths, bigSizes) if err != nil { fatal(err) } @@ -216,17 +226,35 @@ func parseVariants(values []string, includeLTO bool) ([]variant, error) { return out, nil } -func generateScenarios(workDir string, names []string, pkgCount, methodCount int, scales []scenarioSize) ([]scenario, error) { +func generateScenarios(workDir string, names []string, pkgCount, methodCount int, scales []scenarioSize, depths []int, bigSizes []scenarioSize) ([]scenario, error) { var out []scenario for _, name := range names { sizes := []scenarioSize{{Packages: pkgCount, Methods: methodCount}} if len(scales) != 0 && (name == "multipkg" || name == "cold") { sizes = scales } + if name == "deep" { + sizes = []scenarioSize{{Packages: 32}} + if len(depths) != 0 { + sizes = sizes[:0] + for _, d := range depths { + sizes = append(sizes, scenarioSize{Packages: d}) + } + } + } + if name == "bigfunc" { + sizes = []scenarioSize{{Packages: 32, Methods: 200}} + if len(bigSizes) != 0 { + sizes = bigSizes + } + } for _, size := range sizes { scenarioName := name if len(sizes) > 1 { scenarioName = fmt.Sprintf("%s_%dx%d", name, size.Packages, size.Methods) + if name == "deep" { + scenarioName = fmt.Sprintf("%s_%d", name, size.Packages) + } } dir := filepath.Join(workDir, scenarioName) if err := os.MkdirAll(dir, 0755); err != nil { @@ -237,7 +265,7 @@ func generateScenarios(workDir string, names []string, pkgCount, methodCount int case "hot": err = generateHot(dir) case "deep": - err = generateDeep(dir) + err = generateDeep(dir, size.Packages) case "multipkg": err = generateMultipkg(dir, size.Packages, size.Methods) case "cold": @@ -246,6 +274,8 @@ func generateScenarios(workDir string, names []string, pkgCount, methodCount int err = generateStdlib(dir) case "plain": err = generatePlain(dir) + case "bigfunc": + err = generateBigfunc(dir, size.Packages, size.Methods) default: return nil, fmt.Errorf("unknown scenario %q", name) } @@ -253,7 +283,7 @@ func generateScenarios(workDir string, names []string, pkgCount, methodCount int return nil, err } sc := scenario{Name: scenarioName, Kind: name, Dir: dir} - if name == "multipkg" || name == "cold" { + if name == "multipkg" || name == "cold" || name == "bigfunc" { sc.PackageCount = size.Packages sc.MethodCount = size.Methods sc.TargetCount = size.Packages * size.Methods @@ -265,6 +295,37 @@ func generateScenarios(workDir string, names []string, pkgCount, methodCount int return out, nil } +func parseInts(list string) ([]int, error) { + var out []int + for _, tok := range splitList(list) { + n, err := strconv.Atoi(tok) + if err != nil || n <= 0 { + return nil, fmt.Errorf("bad int %q", tok) + } + out = append(out, n) + } + return out, nil +} + +// parseScalePairs parses "AxB,CxD" lists that are not tied to the +// multipkg/cold flag defaults. +func parseScalePairs(list string) ([]scenarioSize, error) { + var out []scenarioSize + for _, tok := range splitList(list) { + a, b, ok := strings.Cut(tok, "x") + if !ok { + return nil, fmt.Errorf("bad size %q", tok) + } + f, err1 := strconv.Atoi(a) + st, err2 := strconv.Atoi(b) + if err1 != nil || err2 != nil || f <= 0 || st <= 0 { + return nil, fmt.Errorf("bad size %q", tok) + } + out = append(out, scenarioSize{Packages: f, Methods: st}) + } + return out, nil +} + func writeModule(dir, module string) error { return os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module "+module+"\n\ngo 1.24\n"), 0644) } @@ -276,18 +337,18 @@ func generateHot(dir string) error { return os.WriteFile(filepath.Join(dir, "main.go"), []byte(hotSource), 0644) } -func generateDeep(dir string) error { +func generateDeep(dir string, depth int) error { if err := writeModule(dir, "example.com/llgo-bench/deep"); err != nil { return err } var b strings.Builder b.WriteString(deepPrefix) - for i := 0; i < 32; i++ { + for i := 0; i < depth; i++ { fmt.Fprintf(&b, "//go:noinline\nfunc frame%d() { frame%d() }\n\n", i, i+1) } - b.WriteString(`//go:noinline -func frame32() { - pc, file, line, ok := runtime.Caller(16) + fmt.Fprintf(&b, `//go:noinline +func frame%d() { + pc, file, line, ok := runtime.Caller(%d) if !ok || pc == 0 || file == "" || line == 0 { panic("bad deep caller") } @@ -296,8 +357,12 @@ func frame32() { sinkInt += line } -`) - b.WriteString(deepSuffix) +`, depth, depth/2) + suffix := strings.ReplaceAll(deepSuffix, "[64]uintptr", fmt.Sprintf("[%d]uintptr", depth+64)) + for _, m := range []string{"Direct", "Interface", "Closure"} { + suffix = strings.ReplaceAll(suffix, "deep."+m+"32", fmt.Sprintf("deep.%s%d", m, depth)) + } + b.WriteString(suffix) return os.WriteFile(filepath.Join(dir, "main.go"), []byte(b.String()), 0644) } @@ -534,6 +599,33 @@ func generateStdlib(dir string) error { return os.WriteFile(filepath.Join(dir, "main.go"), []byte(stdlibSource), 0644) } +// generateBigfunc emits `funcs` functions of `stmts` call-site statements +// each: large bodies stress statement-level pcline density (many sites per +// findfunctab bucket), mid-function pc symbolization, first-use pcline table +// construction at scale, and ordinary-code performance of big method bodies. +func generateBigfunc(dir string, funcs, stmts int) error { + if err := writeModule(dir, "example.com/llgo-bench/bigfunc"); err != nil { + return err + } + var b strings.Builder + b.WriteString(bigfuncPrefix) + for i := 0; i < funcs; i++ { + fmt.Fprintf(&b, "//go:noinline\nfunc big%03d(x int) int {\n", i) + for j := 0; j < stmts; j++ { + b.WriteString("\tx = leaf(x)\n") + } + b.WriteString("\tif captureBig {\n\t\tpc, _, line, ok := runtime.Caller(0)\n\t\tif !ok || line == 0 {\n\t\t\tpanic(\"bad big caller\")\n\t\t}\n\t\tbigPCs = append(bigPCs, pc)\n\t}\n") + b.WriteString("\treturn x\n}\n\n") + } + b.WriteString("//go:noinline\nfunc runAll(x int) int {\n") + for i := 0; i < funcs; i++ { + fmt.Fprintf(&b, "\tx = big%03d(x)\n", i) + } + b.WriteString("\treturn x\n}\n\n") + b.WriteString(bigfuncMain) + return os.WriteFile(filepath.Join(dir, "main.go"), []byte(b.String()), 0644) +} + func generatePlain(dir string) error { if err := writeModule(dir, "example.com/llgo-bench/plain"); err != nil { return err @@ -632,6 +724,8 @@ func iterationsForScenario(name string, base int) int { div = 20 case "plain": div = 2000 + case "bigfunc": + div = 100 } n := base / div if n < 1 { @@ -1412,3 +1506,91 @@ func main() { } } ` + +const bigfuncPrefix = `package main + +import ( + "fmt" + "os" + "runtime" + "time" +) + +var sinkInt int +var captureBig bool +var bigPCs []uintptr + +` + commonBenchHelpers + ` + +//go:noinline +func leaf(x int) int { return x + 1 } + +` + +const bigfuncMain = `func main() { + iters := benchIters(100) + + // Capture one tail-of-body pc per big function (a statement-level pc + // deep inside a large body, not a function entry). + captureBig = true + sinkInt += runAll(1) + captureBig = false + if len(bigPCs) == 0 { + panic("no big pcs") + } + + // First statement-level FileLine in this process: includes any lazy + // line-table work over funcs*stmts call sites. + t := time.Now() + fn := runtime.FuncForPC(bigPCs[0]) + if fn == nil { + panic("no func") + } + file, line := fn.FileLine(bigPCs[0]) + if file == "" || line == 0 { + panic("bad first fileline") + } + fmt.Printf("bigfunc.FirstFileLineMid=%d\n", time.Since(t).Nanoseconds()) + + measure("bigfunc.FuncForPCMid", iters*100, func() { + for _, pc := range bigPCs { + if runtime.FuncForPC(pc) == nil { + panic("no func") + } + } + }) + + measure("bigfunc.FileLineMid", iters*100, func() { + for _, pc := range bigPCs { + f := runtime.FuncForPC(pc) + if f == nil { + panic("no func") + } + _, l := f.FileLine(pc) + sinkInt += l + } + }) + + measure("bigfunc.CallersFramesMid", iters*10, func() { + var pcs [16]uintptr + n := runtime.Callers(0, pcs[:]) + frames := runtime.CallersFrames(pcs[:n]) + for { + frame, more := frames.Next() + sinkInt += frame.Line + if !more { + break + } + } + }) + + // Ordinary performance of the large bodies themselves. + measure("bigfunc.Work", iters, func() { + sinkInt += runAll(1) + }) + + if sinkInt == 0 { + os.Exit(1) + } +} +` From 5e9f6b86a1f1fd4a47d8da0b02b0933448db3c9d Mon Sep 17 00:00:00 2001 From: Li Jie Date: Thu, 2 Jul 2026 23:42:22 +0800 Subject: [PATCH 49/59] pclnpost,runtime: fix two scale cliffs found by the 96x96 benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Blob overflow: function-value stubs can double the row count, and at ~9k functions the prebuilt blob no longer fit the entry section, so the rewrite silently fell back to first-use construction (cold.FirstFuncForPC 96x96 non-LTO: 2.4ms). On overflow, retry with function entries only — stub pcs degrade to dladdr, real entries keep the prebuilt table. - FuncForPC cache thrash: the set-associative pc cache holds 4k entries; batch workloads over 9k+ distinct functions evicted constantly and paid the string-materializing slow path on every call (multipkg.FuncForPCMany 96x96: 8-11ms vs Go 172µs). Add a per-ftab-row *Func cache for exact-entry lookups, so batch lookups are O(binary search) after the first pass at any scale. Co-Authored-By: Claude Fable 5 --- internal/pclnpost/binary.go | 7 +++- internal/pclnpost/pclnpost.go | 13 ++++++++ internal/pclnpost/write.go | 7 +++- .../lib/runtime/pprof_runtime_stub_llgo.go | 25 +++++++++++++- runtime/internal/lib/runtime/symtab.go | 33 +++++++++++++++++++ 5 files changed, 82 insertions(+), 3 deletions(-) diff --git a/internal/pclnpost/binary.go b/internal/pclnpost/binary.go index 0a9ce4a6e1..5ddb712903 100644 --- a/internal/pclnpost/binary.go +++ b/internal/pclnpost/binary.go @@ -37,6 +37,7 @@ import ( type siteRecord struct { pc uint64 symbolID uint64 + stub bool // owner is a __llgo_stub.* wrapper } type textSym struct { @@ -260,6 +261,10 @@ func canonicalOwner(info *binaryInfo, name string, symbolID uint64) bool { } } +func isStubName(name string) bool { + return stringIndex(name, stubPrefix) >= 0 +} + func stringIndex(s, prefix string) int { // prefix at the start, allowing for leading mangling underscores only for i := 0; i+len(prefix) <= len(s) && i <= 2; i++ { @@ -300,7 +305,7 @@ func dedupe(info *binaryInfo, recs []siteRecord, verbose bool) (kept []siteRecor continue } seenOwner[sym.addr] = true - kept = append(kept, siteRecord{pc: sym.addr, symbolID: r.symbolID}) + kept = append(kept, siteRecord{pc: sym.addr, symbolID: r.symbolID, stub: isStubName(sym.name)}) } return kept, droppedInline, droppedUnknown } diff --git a/internal/pclnpost/pclnpost.go b/internal/pclnpost/pclnpost.go index c65f815054..c97974f260 100644 --- a/internal/pclnpost/pclnpost.go +++ b/internal/pclnpost/pclnpost.go @@ -61,6 +61,19 @@ func Rewrite(path string) (Stats, error) { return st, fmt.Errorf("no records survived dedup") } ftab, buckets, err := writeBack(path, info, kept) + if err == errBlobOverflow { + // Function-value stubs can double the row count; when the blob does + // not fit the entry section, keep real function entries (the common + // queries) and let stub pcs fall back to dladdr. + funcsOnly := kept[:0] + for _, r := range kept { + if !r.stub { + funcsOnly = append(funcsOnly, r) + } + } + st.Kept = len(funcsOnly) + ftab, buckets, err = writeBack(path, info, funcsOnly) + } if err != nil { return st, err } diff --git a/internal/pclnpost/write.go b/internal/pclnpost/write.go index 8ccd24d11c..fe89f051c0 100644 --- a/internal/pclnpost/write.go +++ b/internal/pclnpost/write.go @@ -20,6 +20,7 @@ import ( "debug/elf" "debug/macho" "encoding/binary" + "errors" "fmt" "os" "os/exec" @@ -40,6 +41,10 @@ import ( // on non-PIE ELF the link-time value already equals the runtime address. const prebuiltMagic = uint64(0x314254464F474C4C) +// errBlobOverflow reports that the prebuilt blob does not fit the entry +// section; the caller retries without stub rows before giving up. +var errBlobOverflow = errors.New("prebuilt blob does not fit entry section") + const ( bucketSize = 4096 subbucketCnt = 16 @@ -119,7 +124,7 @@ func writeBack(path string, info *binaryInfo, kept []siteRecord) (ftabCount, buc need := 32 + count*8 + len(buckets) entrySize := int(info.entryVMSize) if need > entrySize { - return 0, 0, fmt.Errorf("prebuilt blob %dB does not fit entry section %dB", need, entrySize) + return 0, 0, errBlobOverflow } blob := make([]byte, entrySize) // zero tail binary.LittleEndian.PutUint64(blob[0:], prebuiltMagic) diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index 3da596f877..a49217846f 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -2,7 +2,11 @@ package runtime -import llrt "github.com/goplus/llgo/runtime/internal/runtime" +import ( + "unsafe" + + llrt "github.com/goplus/llgo/runtime/internal/runtime" +) type StackRecord struct { Stack []uintptr @@ -145,6 +149,25 @@ func funcForPCSlow(pc uintptr) *Func { // backend prologue, so an exact entry PC may sort before its anchor. // Prefer the section table when it can match within the entry slack; // native symbol lookup is kept only as a fallback. + // Exact-entry lookups hit the per-ftab-row cache first: the pc cache + // thrashes when the queried function population outgrows it, and + // batch workloads (FuncForPC over every function) would otherwise + // pay the string-materializing slow path per call, forever. + if runtimeFuncPCFramesBuilt() && runtimeFuncPCFramesPrebuilt { + if idx := prebuiltFrameIndexForEntry(pc); idx >= 0 { + if p := prebuiltFuncCacheLoad(idx); p != nil { + fn := (*Func)(p) + cacheFuncForPC(pc, fn) + return fn + } + if sym, ok := pcSymbolForFuncInfoIndex(pc, pc, prebuiltFrame(idx).funcIndex); ok { + fn := newFuncForPC(pc, sym) + prebuiltFuncCacheStore(idx, unsafe.Pointer(fn)) + cacheFuncForPC(pc, fn) + return fn + } + } + } if sym, ok := funcPCFrameForEntryPC(pc); ok { fn := newFuncForPC(pc, sym) cacheFuncForPC(pc, fn) diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 37d5606c38..74a9a2d1cd 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -796,6 +796,38 @@ var runtimePrebuiltBase uintptr var runtimePrebuiltFtab []runtimePrebuiltFtabEntry var runtimePrebuiltEntriesOnce uint32 +// runtimePrebuiltFuncs caches one *Func per ftab row for exact-entry +// lookups. The set-associative pc cache in FuncForPC thrashes once the live +// pc population outgrows it (thousands of distinct functions queried in a +// loop); this cache is keyed by table row, so batch workloads stay O(search) +// after the first pass regardless of scale. Same benign-race model as the +// pc cache: word-sized pointer stores of identical values. +var runtimePrebuiltFuncs []unsafe.Pointer + +func prebuiltFuncCacheLoad(idx int) unsafe.Pointer { + if idx < 0 || idx >= len(runtimePrebuiltFuncs) { + return nil + } + return runtimePrebuiltFuncs[idx] +} + +func prebuiltFuncCacheStore(idx int, fn unsafe.Pointer) { + if idx < 0 || idx >= len(runtimePrebuiltFuncs) { + return + } + runtimePrebuiltFuncs[idx] = fn +} + +// prebuiltFrameIndexForEntry returns the ftab row whose entry is exactly pc, +// or -1. +func prebuiltFrameIndexForEntry(pc uintptr) int { + idx := prebuiltFrameIndex(pc) + if idx < 0 || prebuiltFrame(idx).entry != pc { + return -1 + } + return idx +} + // adoptPrebuiltFuncPCTable installs a zero-copy view over the prebuilt table // if the entry section carries the magic header. Returns false to fall back // to first-use construction. @@ -828,6 +860,7 @@ func adoptPrebuiltFuncPCTable() bool { runtimeFuncPCFramesPrebuilt = true runtimeFuncPCFramesFromSites = true runtimeFuncPCStubsFromSites = true + runtimePrebuiltFuncs = make([]unsafe.Pointer, count) return true } From d9873a0387e9ecc3b361d234ad71dbf70a1fee97 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Fri, 3 Jul 2026 00:00:58 +0800 Subject: [PATCH 50/59] pclnpost,runtime: spill the prebuilt blob into the stub section on overflow Function-value stubs can push the row count past what the entry section holds (~9k functions with taken addresses). Instead of dropping stub rows, write the full blob into the (larger) stub section and leave a 32-byte redirect header ("LLGOFTB2" + a live-relocation pointer) in the entry section; the runtime follows it and adopts the same zero-copy view. Function-value lookups keep the prebuilt table at any scale instead of degrading to dladdr. Co-Authored-By: Claude Fable 5 --- internal/pclnpost/binary.go | 6 +-- internal/pclnpost/pclnpost.go | 6 ++- internal/pclnpost/write.go | 61 ++++++++++++++++++++------ runtime/internal/lib/runtime/symtab.go | 42 +++++++++++++++--- 4 files changed, 89 insertions(+), 26 deletions(-) diff --git a/internal/pclnpost/binary.go b/internal/pclnpost/binary.go index 5ddb712903..f79ee9c194 100644 --- a/internal/pclnpost/binary.go +++ b/internal/pclnpost/binary.go @@ -69,7 +69,7 @@ type binaryInfo struct { bindTargets []uint64 entryVMAddr, entryVMSize, entryFileOff uint64 - stubVMSize, stubFileOff uint64 + stubVMAddr, stubVMSize, stubFileOff uint64 } // readVM returns n bytes at a link-time virtual address. @@ -97,7 +97,7 @@ func load(path string) (*binaryInfo, error) { } if s := mf.Section("__llgo_stub"); s != nil { info.stubSec, _ = s.Data() - info.stubVMSize, info.stubFileOff = s.Size, uint64(s.Offset) + info.stubVMAddr, info.stubVMSize, info.stubFileOff = s.Addr, s.Size, uint64(s.Offset) } if s := mf.Section("__text"); s != nil { info.textStart, info.textEnd = s.Addr, s.Addr+s.Size @@ -132,7 +132,7 @@ func load(path string) (*binaryInfo, error) { } if s := ef.Section("llgo_funcinfo_stubsite"); s != nil { info.stubSec, _ = s.Data() - info.stubVMSize, info.stubFileOff = s.Size, s.Offset + info.stubVMAddr, info.stubVMSize, info.stubFileOff = s.Addr, s.Size, s.Offset } if s := ef.Section(".text"); s != nil { info.textStart, info.textEnd = s.Addr, s.Addr+s.Size diff --git a/internal/pclnpost/pclnpost.go b/internal/pclnpost/pclnpost.go index c97974f260..17c24e5567 100644 --- a/internal/pclnpost/pclnpost.go +++ b/internal/pclnpost/pclnpost.go @@ -46,8 +46,10 @@ func Rewrite(path string) (Stats, error) { return st, err } st.Format = info.format - if len(info.entrySec) >= 8 && binary.LittleEndian.Uint64(info.entrySec) == prebuiltMagic { - return st, fmt.Errorf("already rewritten") + if len(info.entrySec) >= 8 { + if m := binary.LittleEndian.Uint64(info.entrySec); m == prebuiltMagic || m == redirectMagic { + return st, fmt.Errorf("already rewritten") + } } entries := parseRecords(info, info.entrySec) stubs := parseRecords(info, info.stubSec) diff --git a/internal/pclnpost/write.go b/internal/pclnpost/write.go index fe89f051c0..8e848d6705 100644 --- a/internal/pclnpost/write.go +++ b/internal/pclnpost/write.go @@ -41,9 +41,18 @@ import ( // on non-PIE ELF the link-time value already equals the runtime address. const prebuiltMagic = uint64(0x314254464F474C4C) -// errBlobOverflow reports that the prebuilt blob does not fit the entry -// section; the caller retries without stub rows before giving up. -var errBlobOverflow = errors.New("prebuilt blob does not fit entry section") +// errBlobOverflow reports that the prebuilt blob fits neither the entry +// section nor the (larger) stub section; the caller retries without stub +// rows before giving up. +var errBlobOverflow = errors.New("prebuilt blob does not fit entry or stub section") + +// redirectMagic ("LLGOFTB2" little-endian) marks a 32-byte entry-section +// header whose third word points at the real blob, written into the stub +// section when the table outgrows the entry section (stub rows can double +// the count). The pointer slot is a live relocation like the in-place base +// slot: dyld rebases it on Mach-O, and non-PIE ELF link addresses already +// equal runtime addresses. +const redirectMagic = uint64(0x324254464F474C4C) const ( bucketSize = 4096 @@ -123,12 +132,21 @@ func writeBack(path string, info *binaryInfo, kept []siteRecord) (ftabCount, buc need := 32 + count*8 + len(buckets) entrySize := int(info.entryVMSize) - if need > entrySize { + spill := need > entrySize + if spill && need > int(info.stubVMSize) { return 0, 0, errBlobOverflow } - blob := make([]byte, entrySize) // zero tail + blobSect := int(info.entryVMSize) + blobFileOff := info.entryFileOff + blobVMAddr := info.entryVMAddr + if spill { + blobSect = int(info.stubVMSize) + blobFileOff = info.stubFileOff + blobVMAddr = info.stubVMAddr + } + blob := make([]byte, blobSect) // zero tail binary.LittleEndian.PutUint64(blob[0:], prebuiltMagic) - binary.LittleEndian.PutUint64(blob[8:], info.entryVMAddr) + binary.LittleEndian.PutUint64(blob[8:], blobVMAddr) binary.LittleEndian.PutUint64(blob[16:], base) binary.LittleEndian.PutUint32(blob[24:], uint32(count)) binary.LittleEndian.PutUint32(blob[28:], uint32(len(buckets)/bucketBytes)) @@ -156,22 +174,37 @@ func writeBack(path string, info *binaryInfo, kept []siteRecord) (ftabCount, buc if info.stubVMSize > 0 { ranges = append(ranges, [2]uint64{info.stubFileOff, info.stubFileOff + info.stubVMSize}) } - // The header's base slot is spliced back into the chain as a live - // rebase node: dyld writes the *slid* text base there at load, so - // the runtime reads a ready runtime PC with no slide arithmetic. - inserts := []fixupInsert{{fileOff: info.entryFileOff + 16, targetVM: base}} + // Pointer slots are spliced back into the chain as live rebase + // nodes: dyld writes *slid* addresses at load, so the runtime reads + // ready runtime pointers with no slide arithmetic. + inserts := []fixupInsert{{fileOff: blobFileOff + 16, targetVM: base}} + if spill { + inserts = append(inserts, fixupInsert{fileOff: info.entryFileOff + 16, targetVM: info.stubVMAddr}) + } pending, err = unchainRanges(raw, ranges, inserts) if err != nil { return 0, 0, fmt.Errorf("chained fixups: %w", err) } } - copy(raw[info.entryFileOff:], blob) + if spill { + // Entry section: zero + 32-byte redirect header to the stub-section + // blob. Zeroed records keep the runtime's fallback scans empty. + zero := raw[info.entryFileOff : info.entryFileOff+info.entryVMSize] + for i := range zero { + zero[i] = 0 + } + binary.LittleEndian.PutUint64(zero[0:], redirectMagic) + binary.LittleEndian.PutUint64(zero[8:], info.entryVMAddr) + binary.LittleEndian.PutUint64(zero[16:], info.stubVMAddr) + } + copy(raw[blobFileOff:], blob) for _, pw := range pending { binary.LittleEndian.PutUint64(raw[pw.fileOff:], pw.val) } - // Void the stub section: zero its records so the runtime's fallback scan - // finds nothing (stub entries are already merged into the table above). - if info.stubVMSize > 0 { + // Void the stub section when the blob lives in the entry section: zero + // its records so the runtime's fallback scan finds nothing (stub entries + // are already merged into the table above). + if !spill && info.stubVMSize > 0 { zero := raw[info.stubFileOff : info.stubFileOff+info.stubVMSize] for i := range zero { zero[i] = 0 diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 74a9a2d1cd..43fee60286 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -781,6 +781,10 @@ func initRuntimeFuncPCFramesSlow() { // and normalizes entries to true symbol starts, so adopting the table also // retires first-use sorting and the dlsym/stub fallbacks. const runtimePrebuiltMagic = uint64(0x314254464F474C4C) // "LLGOFTB1" little-endian +// "LLGOFTB2": the entry section holds only a 32-byte redirect whose third +// word is the runtime address of the real blob, written into the (larger) +// stub section when the table outgrew the entry section. +const runtimePrebuiltRedirectMagic = uint64(0x324254464F474C4C) const runtimePrebuiltHeaderSize = 8 + 8 + 8 + 4 + 4 type runtimePrebuiltFtabEntry struct { @@ -840,6 +844,20 @@ func adoptPrebuiltFuncPCTable() bool { if end < start+runtimePrebuiltHeaderSize { return false } + if *(*uint64)(unsafe.Pointer(start)) == runtimePrebuiltRedirectMagic { + // Blob spilled into the stub section; the pointer slot is a live + // relocation, so it already holds the runtime address. + blob := uintptr(*(*uint64)(unsafe.Pointer(start + 16))) + if blob == 0 || runtimeFuncInfoStubSiteStart == nil || runtimeFuncInfoStubSiteEnd == nil { + return false + } + stubStart := uintptr(unsafe.Pointer(runtimeFuncInfoStubSiteStart)) + stubEnd := uintptr(unsafe.Pointer(runtimeFuncInfoStubSiteEnd)) + if blob != stubStart || stubEnd < stubStart { + return false + } + start, end = blob, stubEnd + } if *(*uint64)(unsafe.Pointer(start)) != runtimePrebuiltMagic { return false } @@ -1386,7 +1404,11 @@ func prebuiltFuncPCTablePresent() bool { } start := uintptr(unsafe.Pointer(runtimeFuncInfoEntryStart)) end := uintptr(unsafe.Pointer(runtimeFuncInfoEntryEnd)) - return end >= start+8 && *(*uint64)(unsafe.Pointer(start)) == runtimePrebuiltMagic + if end < start+8 { + return false + } + m := *(*uint64)(unsafe.Pointer(start)) + return m == runtimePrebuiltMagic || m == runtimePrebuiltRedirectMagic } // runtimeFuncInfoWarmSink keeps the warm-up loads observable. @@ -1407,6 +1429,9 @@ func init() { return } initRuntimeFuncPCFrames() // zero-copy adoption, sub-µs + if !runtimeFuncPCFramesPrebuilt { + return + } touch := func(base unsafe.Pointer, n uintptr) { if base == nil || n == 0 { return @@ -1420,12 +1445,15 @@ func init() { sink += *(*byte)(unsafe.Pointer(p + n - 1)) runtimeFuncInfoWarmSink = sink } - start := uintptr(unsafe.Pointer(runtimeFuncInfoEntryStart)) - count := *(*uint32)(unsafe.Pointer(start + 24)) - bucketCount := *(*uint32)(unsafe.Pointer(start + 28)) - need := uintptr(runtimePrebuiltHeaderSize) + uintptr(count)*8 + - uintptr(bucketCount)*unsafe.Sizeof(runtimePCFindBucket{}) - touch(unsafe.Pointer(runtimeFuncInfoEntryStart), need) + // The adopted blob may live in the entry section or (spilled) in the + // stub section; derive its range from the adopted views. + if n := len(runtimePrebuiltFtab); n > 0 { + touch(unsafe.Pointer(&runtimePrebuiltFtab[0]), uintptr(n)*8) + } + if n := len(runtimeFuncPCIndex.buckets); n > 0 { + touch(unsafe.Pointer(&runtimeFuncPCIndex.buckets[0]), + uintptr(n)*unsafe.Sizeof(runtimePCFindBucket{})) + } touch(unsafe.Pointer(runtimeFuncInfoTable), runtimeFuncInfoCount*unsafe.Sizeof(runtimeFuncInfoRecord{})) touch(unsafe.Pointer(runtimeFuncInfoStringOffsets), From 41d6be09f61fa7668a205ac0744bd0738166836c Mon Sep 17 00:00:00 2001 From: Li Jie Date: Fri, 3 Jul 2026 09:24:29 +0800 Subject: [PATCH 51/59] runtime: exact-entry FuncForPC lookup must not require pc alignment funcForPCSlow treated any unaligned pc as a shadow-stack synthetic marker. arm64 function entries are always 4-aligned so this never fired, but amd64 function and stub entries need not be: an unaligned function-value pc skipped the prebuilt exact-entry path entirely and fell through to nearest-below symbolization, reporting the previous function's name (test/go TestRuntimeLineInfoAndStack on ubuntu CI, "bad function value func: main.renamedPC"). Hoist the prebuilt exact-entry + per-row-cache lookup ahead of the alignment heuristic; a genuine synthetic pc just misses the cheap search and proceeds as before. Co-Authored-By: Claude Fable 5 --- .../lib/runtime/pprof_runtime_stub_llgo.go | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index a49217846f..e32a36160b 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -115,6 +115,25 @@ func FuncForPC(pc uintptr) *Func { } func funcForPCSlow(pc uintptr) *Func { + // Exact-entry lookup first, regardless of alignment: arm64 functions are + // always 4-aligned, but amd64 function and stub entries need not be, and + // an unaligned function-value pc must not be mistaken for a shadow-stack + // synthetic marker (a synthetic pc simply misses this cheap search). + if pc != 0 && runtimeFuncPCFramesBuilt() && runtimeFuncPCFramesPrebuilt { + if idx := prebuiltFrameIndexForEntry(pc); idx >= 0 { + if p := prebuiltFuncCacheLoad(idx); p != nil { + fn := (*Func)(p) + cacheFuncForPC(pc, fn) + return fn + } + if sym, ok := pcSymbolForFuncInfoIndex(pc, pc, prebuiltFrame(idx).funcIndex); ok { + fn := newFuncForPC(pc, sym) + prebuiltFuncCacheStore(idx, unsafe.Pointer(fn)) + cacheFuncForPC(pc, fn) + return fn + } + } + } if pc&3 != 0 { if sym := frameSymbol(pc); sym.ok { fn := newFuncForPC(pc, sym) @@ -149,25 +168,6 @@ func funcForPCSlow(pc uintptr) *Func { // backend prologue, so an exact entry PC may sort before its anchor. // Prefer the section table when it can match within the entry slack; // native symbol lookup is kept only as a fallback. - // Exact-entry lookups hit the per-ftab-row cache first: the pc cache - // thrashes when the queried function population outgrows it, and - // batch workloads (FuncForPC over every function) would otherwise - // pay the string-materializing slow path per call, forever. - if runtimeFuncPCFramesBuilt() && runtimeFuncPCFramesPrebuilt { - if idx := prebuiltFrameIndexForEntry(pc); idx >= 0 { - if p := prebuiltFuncCacheLoad(idx); p != nil { - fn := (*Func)(p) - cacheFuncForPC(pc, fn) - return fn - } - if sym, ok := pcSymbolForFuncInfoIndex(pc, pc, prebuiltFrame(idx).funcIndex); ok { - fn := newFuncForPC(pc, sym) - prebuiltFuncCacheStore(idx, unsafe.Pointer(fn)) - cacheFuncForPC(pc, fn) - return fn - } - } - } if sym, ok := funcPCFrameForEntryPC(pc); ok { fn := newFuncForPC(pc, sym) cacheFuncForPC(pc, fn) From 3a71cacc8b9b27589895097885aa4bf61f6fa335 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Fri, 3 Jul 2026 10:23:25 +0800 Subject: [PATCH 52/59] pclnpost: never degrade to a table with stub gaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The overflow fallback dropped stub rows to fit the entry section. That leaves pc ranges the table claims to cover but does not: a function value whose stub falls in a gap resolves nearest-below to the previous function and silently returns the wrong name — exactly what ubuntu CI caught (amd64 --icf=safe layouts overflow by a few hundred bytes, and non-PIE ELF dladdr cannot rescue). If the blob fits neither the entry section nor the (larger) stub section, skip the rewrite entirely: first-use construction is slower but covers every record. Reproduced and verified on linux/amd64 (qemu): the stub pc had no exact row and nearest-below returned the neighbouring function's name. Co-Authored-By: Claude Fable 5 --- internal/pclnpost/binary.go | 7 +------ internal/pclnpost/pclnpost.go | 19 ++++++------------- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/internal/pclnpost/binary.go b/internal/pclnpost/binary.go index f79ee9c194..ef88d8c28d 100644 --- a/internal/pclnpost/binary.go +++ b/internal/pclnpost/binary.go @@ -37,7 +37,6 @@ import ( type siteRecord struct { pc uint64 symbolID uint64 - stub bool // owner is a __llgo_stub.* wrapper } type textSym struct { @@ -261,10 +260,6 @@ func canonicalOwner(info *binaryInfo, name string, symbolID uint64) bool { } } -func isStubName(name string) bool { - return stringIndex(name, stubPrefix) >= 0 -} - func stringIndex(s, prefix string) int { // prefix at the start, allowing for leading mangling underscores only for i := 0; i+len(prefix) <= len(s) && i <= 2; i++ { @@ -305,7 +300,7 @@ func dedupe(info *binaryInfo, recs []siteRecord, verbose bool) (kept []siteRecor continue } seenOwner[sym.addr] = true - kept = append(kept, siteRecord{pc: sym.addr, symbolID: r.symbolID, stub: isStubName(sym.name)}) + kept = append(kept, siteRecord{pc: sym.addr, symbolID: r.symbolID}) } return kept, droppedInline, droppedUnknown } diff --git a/internal/pclnpost/pclnpost.go b/internal/pclnpost/pclnpost.go index 17c24e5567..053c9ed5fe 100644 --- a/internal/pclnpost/pclnpost.go +++ b/internal/pclnpost/pclnpost.go @@ -63,20 +63,13 @@ func Rewrite(path string) (Stats, error) { return st, fmt.Errorf("no records survived dedup") } ftab, buckets, err := writeBack(path, info, kept) - if err == errBlobOverflow { - // Function-value stubs can double the row count; when the blob does - // not fit the entry section, keep real function entries (the common - // queries) and let stub pcs fall back to dladdr. - funcsOnly := kept[:0] - for _, r := range kept { - if !r.stub { - funcsOnly = append(funcsOnly, r) - } - } - st.Kept = len(funcsOnly) - ftab, buckets, err = writeBack(path, info, funcsOnly) - } if err != nil { + // Includes errBlobOverflow when the blob fits neither the entry nor + // the stub section. Never drop stub rows to squeeze in: a table with + // gaps attributes pcs inside a gap to the previous function + // (nearest-below), which silently returns wrong names on platforms + // where dladdr cannot rescue (non-PIE ELF). First-use construction + // is slower but correct. return st, err } st.FtabEntries, st.Buckets = ftab, buckets From aa4ef2025485c30a7d43b94841196e6e2c013b94 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Fri, 3 Jul 2026 10:32:44 +0800 Subject: [PATCH 53/59] pclnpost: unit-test record decoding, dedupe, ELF rewrite and fixup surgery Fabricated fixtures make the IO paths testable in-process: a minimal ELF exercises load/Rewrite end to end (in-place, stub-section spill, and the overflow fallback that must leave the binary untouched), and a synthetic Mach-O image drives the chained-fixup chain surgery (remove+splice, empty-page insert, unconsumed-insert error). Package coverage 16% -> 69%. Co-Authored-By: Claude Fable 5 --- internal/pclnpost/elf_fixture_test.go | 259 ++++++++++++++++++++++++++ internal/pclnpost/fixups_test.go | 146 +++++++++++++++ internal/pclnpost/logic_test.go | 116 ++++++++++++ 3 files changed, 521 insertions(+) create mode 100644 internal/pclnpost/elf_fixture_test.go create mode 100644 internal/pclnpost/fixups_test.go create mode 100644 internal/pclnpost/logic_test.go diff --git a/internal/pclnpost/elf_fixture_test.go b/internal/pclnpost/elf_fixture_test.go new file mode 100644 index 0000000000..3871dba03e --- /dev/null +++ b/internal/pclnpost/elf_fixture_test.go @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pclnpost + +import ( + "bytes" + "encoding/binary" + "os" + "path/filepath" + "testing" +) + +// buildELF fabricates the minimal ELF load() understands: .text, the two +// funcinfo site sections, a data section holding the symbol index, .symtab +// and .strtab. Layout is one flat file segment; vmaddr == file offset + 0x10000. +type elfFn struct { + name string + size uint64 +} + +func buildELF(t *testing.T, fns []elfFn, entryRecs, stubRecs func(addrOf func(string) uint64) []byte, entryPad, stubPad int) string { + t.Helper() + const base = uint64(0x10000) + var text bytes.Buffer + addr := map[string]uint64{} + for _, fn := range fns { + addr[fn.name] = base + uint64(text.Len()) + text.Write(make([]byte, fn.size)) + } + addrOf := func(n string) uint64 { return addr[n] } + entry := entryRecs(addrOf) + entry = append(entry, make([]byte, entryPad)...) + stub := stubRecs(addrOf) + stub = append(stub, make([]byte, stubPad)...) + + // Symbol index: sorted {u64 fnv(name), u32 funcIndex, u32 pad}. + type sie struct { + id uint64 + idx uint32 + } + var idx []sie + for i, fn := range fns { + idx = append(idx, sie{fnv64(fn.name), uint32(i + 1)}) + } + for i := 0; i < len(idx); i++ { + for j := i + 1; j < len(idx); j++ { + if idx[j].id < idx[i].id { + idx[i], idx[j] = idx[j], idx[i] + } + } + } + var data bytes.Buffer + for _, e := range idx { + binary.Write(&data, binary.LittleEndian, e.id) + binary.Write(&data, binary.LittleEndian, e.idx) + binary.Write(&data, binary.LittleEndian, uint32(0)) + } + idxTableAddr := base + 0x8000 + // pointer global + count global at fixed addrs inside data section + ptrGlobal := idxTableAddr + uint64(data.Len()) + binary.Write(&data, binary.LittleEndian, idxTableAddr) + cntGlobal := idxTableAddr + uint64(data.Len()) + binary.Write(&data, binary.LittleEndian, uint64(len(idx))) + + // Entry section gets a meta record up front (pc=0 rows are skipped by + // parseRecords; the tool locates the index through them). + meta := append(rec(0, metaRecordMagic), rec(ptrGlobal, 0)...) + meta = append(meta, rec(cntGlobal, 0)...) + entry = append(meta, entry...) + + // strtab / symtab + strtab := []byte{0} + var symtab bytes.Buffer + symtab.Write(make([]byte, 24)) // null symbol + for _, fn := range fns { + nameOff := len(strtab) + strtab = append(strtab, fn.name...) + strtab = append(strtab, 0) + binary.Write(&symtab, binary.LittleEndian, uint32(nameOff)) + symtab.WriteByte(0x12) // GLOBAL FUNC + symtab.WriteByte(0) + binary.Write(&symtab, binary.LittleEndian, uint16(1)) // shndx .text + binary.Write(&symtab, binary.LittleEndian, addr[fn.name]) + binary.Write(&symtab, binary.LittleEndian, fn.size) + } + + shstr := []byte{0} + names := map[string]uint32{} + for _, n := range []string{".text", "llgo_funcinfo_entry", "llgo_funcinfo_stubsite", ".data", ".symtab", ".strtab", ".shstrtab"} { + names[n] = uint32(len(shstr)) + shstr = append(shstr, n...) + shstr = append(shstr, 0) + } + + type sec struct { + name string + typ uint32 + addr uint64 + body []byte + link uint32 + entsz uint64 + } + secs := []sec{ + {".text", 1, base, text.Bytes(), 0, 0}, + {"llgo_funcinfo_entry", 1, base + 0x4000, entry, 0, 0}, + {"llgo_funcinfo_stubsite", 1, base + 0x6000, stub, 0, 0}, + {".data", 1, idxTableAddr, data.Bytes(), 0, 0}, + {".symtab", 2, 0, symtab.Bytes(), 6, 24}, + {".strtab", 3, 0, strtab, 0, 0}, + {".shstrtab", 3, 0, shstr, 0, 0}, + } + + var body bytes.Buffer + body.Write(make([]byte, 64)) // ELF header placeholder + offs := make([]uint64, len(secs)) + for i := range secs { + for body.Len()%16 != 0 { + body.WriteByte(0) + } + offs[i] = uint64(body.Len()) + body.Write(secs[i].body) + } + for body.Len()%16 != 0 { + body.WriteByte(0) + } + shoff := uint64(body.Len()) + // null section header + body.Write(make([]byte, 64)) + for i, s := range secs { + var sh [64]byte + binary.LittleEndian.PutUint32(sh[0:], names[s.name]) + binary.LittleEndian.PutUint32(sh[4:], s.typ) + binary.LittleEndian.PutUint64(sh[8:], 2 /*ALLOC*/) + binary.LittleEndian.PutUint64(sh[16:], s.addr) + binary.LittleEndian.PutUint64(sh[24:], offs[i]) + binary.LittleEndian.PutUint64(sh[32:], uint64(len(s.body))) + binary.LittleEndian.PutUint32(sh[40:], s.link) + binary.LittleEndian.PutUint64(sh[56:], s.entsz) + body.Write(sh[:]) + } + raw := body.Bytes() + copy(raw[0:], []byte{0x7f, 'E', 'L', 'F', 2, 1, 1, 0}) + binary.LittleEndian.PutUint16(raw[16:], 2) // EXEC + binary.LittleEndian.PutUint16(raw[18:], 0x3E) // x86-64 + binary.LittleEndian.PutUint32(raw[20:], 1) // version + binary.LittleEndian.PutUint64(raw[40:], shoff) // shoff + binary.LittleEndian.PutUint16(raw[52:], 64) // ehsize + binary.LittleEndian.PutUint16(raw[58:], 64) // shentsize + binary.LittleEndian.PutUint16(raw[60:], uint16(len(secs)+1)) // shnum + binary.LittleEndian.PutUint16(raw[62:], uint16(len(secs))) // shstrndx + + path := filepath.Join(t.TempDir(), "fixture") + if err := os.WriteFile(path, raw, 0755); err != nil { + t.Fatal(err) + } + return path +} + +func fixtureFns() []elfFn { + return []elfFn{ + {"example.com/p.A", 64}, + {"example.com/p.B", 64}, + {"__llgo_stub.example.com/p.A", 16}, + } +} + +func fixtureEntry(addrOf func(string) uint64) []byte { + out := rec(addrOf("example.com/p.A")+4, fnv64("example.com/p.A")) + return append(out, rec(addrOf("example.com/p.B")+4, fnv64("example.com/p.B"))...) +} + +func fixtureStub(addrOf func(string) uint64) []byte { + return rec(addrOf("__llgo_stub.example.com/p.A")+4, fnv64("example.com/p.A")) +} + +func TestRewriteELFInPlace(t *testing.T) { + path := buildELF(t, fixtureFns(), fixtureEntry, fixtureStub, 4096, 256) + st, err := Rewrite(path) + if err != nil { + t.Fatal(err) + } + if st.FtabEntries != 4 { // A, B, stub, sentinel + t.Fatalf("stats %+v", st) + } + // Idempotence guard. + if _, err := Rewrite(path); err == nil { + t.Fatal("expected already-rewritten error") + } + // Adoptable header with a plain runtime base on non-PIE ELF. + info, err := load(path) + if err != nil { + t.Fatal(err) + } + if got := binary.LittleEndian.Uint64(info.entrySec[0:]); got != prebuiltMagic { + t.Fatalf("magic %#x", got) + } + base := binary.LittleEndian.Uint64(info.entrySec[16:]) + if base != 0x10000 { // first function entry + t.Fatalf("base %#x", base) + } + // Stub section voided. + for _, b := range info.stubSec { + if b != 0 { + t.Fatal("stub section not zeroed") + } + } +} + +func TestRewriteELFSpillsToStubSection(t *testing.T) { + // Entry section too small for the blob, stub section large enough. + path := buildELF(t, fixtureFns(), fixtureEntry, fixtureStub, 0, 8192) + st, err := Rewrite(path) + if err != nil { + t.Fatal(err) + } + if st.FtabEntries != 4 { + t.Fatalf("stats %+v", st) + } + info, err := load(path) + if err != nil { + t.Fatal(err) + } + if got := binary.LittleEndian.Uint64(info.entrySec[0:]); got != redirectMagic { + t.Fatalf("entry magic %#x", got) + } + if got := binary.LittleEndian.Uint64(info.stubSec[0:]); got != prebuiltMagic { + t.Fatalf("stub magic %#x", got) + } + if ptr := binary.LittleEndian.Uint64(info.entrySec[16:]); ptr != info.stubVMAddr { + t.Fatalf("redirect ptr %#x want %#x", ptr, info.stubVMAddr) + } +} + +func TestRewriteELFOverflowFallsBack(t *testing.T) { + // Neither section fits: Rewrite must fail (no gap-y table). + path := buildELF(t, fixtureFns(), fixtureEntry, fixtureStub, 0, 0) + before, _ := os.ReadFile(path) + if _, err := Rewrite(path); err == nil { + t.Fatal("expected overflow error") + } + after, _ := os.ReadFile(path) + if !bytes.Equal(before, after) { + t.Fatal("binary must be untouched on failure") + } +} diff --git a/internal/pclnpost/fixups_test.go b/internal/pclnpost/fixups_test.go new file mode 100644 index 0000000000..e2591c4a41 --- /dev/null +++ b/internal/pclnpost/fixups_test.go @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pclnpost + +import ( + "encoding/binary" + "testing" +) + +// buildMachOForFixups fabricates the minimal Mach-O that unchainRanges +// parses: header, one LC_SEGMENT_64 covering one 16K page of payload, and an +// LC_DYLD_CHAINED_FIXUPS blob whose page chain holds `nodes` (file offsets, +// ascending, stride-4 aligned) encoded as DYLD_CHAINED_PTR_64 rebases. +func buildMachOForFixups(t *testing.T, nodes []uint64, targets []uint64) ([]byte, uint64) { + t.Helper() + const pageSize = 0x4000 + const segFileOff = uint64(0x8000) + raw := make([]byte, segFileOff+pageSize) + binary.LittleEndian.PutUint32(raw[0:], 0xFEEDFACF) + binary.LittleEndian.PutUint32(raw[16:], 2) // ncmds + // LC_SEGMENT_64 at 32 + off := 32 + binary.LittleEndian.PutUint32(raw[off:], 0x19) + binary.LittleEndian.PutUint32(raw[off+4:], 72) + binary.LittleEndian.PutUint64(raw[off+24:], 0x100000000) // vmaddr + binary.LittleEndian.PutUint64(raw[off+40:], segFileOff) // fileoff + binary.LittleEndian.PutUint64(raw[off+48:], pageSize) // filesize + // LC_DYLD_CHAINED_FIXUPS + off += 72 + fixOff := uint64(0x200) + binary.LittleEndian.PutUint32(raw[off:], 0x80000034) + binary.LittleEndian.PutUint32(raw[off+4:], 16) + binary.LittleEndian.PutUint32(raw[off+8:], uint32(fixOff)) + binary.LittleEndian.PutUint32(raw[off+12:], 0x100) + // dyld_chained_fixups_header + h := fixOff + binary.LittleEndian.PutUint32(raw[h+4:], 32) // starts_offset + // starts_in_image: seg_count=1, seg_info_offset[0]=8 + s := h + 32 + binary.LittleEndian.PutUint32(raw[s:], 1) + binary.LittleEndian.PutUint32(raw[s+4:], 8) + // starts_in_segment + g := s + 8 + binary.LittleEndian.PutUint16(raw[g+4:], 0x4000) // page_size + binary.LittleEndian.PutUint16(raw[g+6:], 2) // DYLD_CHAINED_PTR_64 + binary.LittleEndian.PutUint64(raw[g+8:], segFileOff) + binary.LittleEndian.PutUint16(raw[g+20:], 1) // page_count + if len(nodes) == 0 { + binary.LittleEndian.PutUint16(raw[g+22:], chainedPtrStartNone) + } else { + binary.LittleEndian.PutUint16(raw[g+22:], uint16(nodes[0]-segFileOff)) + } + for i, n := range nodes { + next := uint64(0) + if i+1 < len(nodes) { + next = (nodes[i+1] - n) / 4 + } + val := (next << 51) | (targets[i] & (1<<36 - 1)) + binary.LittleEndian.PutUint64(raw[n:], val) + } + return raw, segFileOff +} + +func chainNodes(t *testing.T, raw []byte, segFileOff uint64) []uint64 { + t.Helper() + g := uint64(0x200) + 32 + 8 + start := binary.LittleEndian.Uint16(raw[g+22:]) + if start == chainedPtrStartNone { + return nil + } + var out []uint64 + n := segFileOff + uint64(start) + for { + out = append(out, n) + next := (binary.LittleEndian.Uint64(raw[n:]) >> 51) & 0xFFF + if next == 0 { + return out + } + n += next * 4 + } +} + +func TestUnchainRangesRemovesAndSplices(t *testing.T) { + nodes := []uint64{0x8000, 0x8010, 0x8020, 0x8030} + targets := []uint64{0x100000000, 0x100000008, 0x100000010, 0x100000018} + raw, seg := buildMachOForFixups(t, nodes, targets) + // Remove the middle two, splice one insert into the removed range. + pend, err := unchainRanges(raw, [][2]uint64{{0x8010, 0x8030}}, + []fixupInsert{{fileOff: 0x8018, targetVM: 0x100000abc}}) + if err != nil { + t.Fatal(err) + } + if len(pend) != 1 || pend[0].fileOff != 0x8018 { + t.Fatalf("pending %+v", pend) + } + binary.LittleEndian.PutUint64(raw[pend[0].fileOff:], pend[0].val) + got := chainNodes(t, raw, seg) + want := []uint64{0x8000, 0x8018, 0x8030} + if len(got) != len(want) { + t.Fatalf("chain %#v", got) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("chain %#v want %#v", got, want) + } + } + if v := binary.LittleEndian.Uint64(raw[0x8018:]) & (1<<36 - 1); v != 0x100000abc&(1<<36-1) { + t.Fatalf("insert target %#x", v) + } +} + +func TestUnchainRangesUnconsumedInsertFails(t *testing.T) { + nodes := []uint64{0x8000} + raw, _ := buildMachOForFixups(t, nodes, []uint64{0x100000000}) + if _, err := unchainRanges(raw, nil, []fixupInsert{{fileOff: 0x4, targetVM: 1}}); err == nil { + t.Fatal("expected unconsumed-insert error") + } +} + +func TestUnchainRangesEmptyPageGainsInsert(t *testing.T) { + raw, seg := buildMachOForFixups(t, nil, nil) + pend, err := unchainRanges(raw, nil, []fixupInsert{{fileOff: 0x8040, targetVM: 0x100000042}}) + if err != nil { + t.Fatal(err) + } + binary.LittleEndian.PutUint64(raw[pend[0].fileOff:], pend[0].val) + got := chainNodes(t, raw, seg) + if len(got) != 1 || got[0] != 0x8040 { + t.Fatalf("chain %#v", got) + } + _ = seg +} diff --git a/internal/pclnpost/logic_test.go b/internal/pclnpost/logic_test.go new file mode 100644 index 0000000000..a37f7932b5 --- /dev/null +++ b/internal/pclnpost/logic_test.go @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pclnpost + +import ( + "encoding/binary" + "testing" +) + +func rec(pc, id uint64) []byte { + var b [16]byte + binary.LittleEndian.PutUint64(b[0:], pc) + binary.LittleEndian.PutUint64(b[8:], id) + return b[:] +} + +func TestParseRecordsELF(t *testing.T) { + info := &binaryInfo{format: "elf", textStart: 0x1000, textEnd: 0x2000} + sec := append(rec(0, 0), rec(0x1100, 7)...) // zero keep-alive skipped + sec = append(sec, rec(0x1200, 0)...) // id==0 skipped + sec = append(sec, rec(0x1300, 9)...) + got := parseRecords(info, sec) + if len(got) != 2 || got[0].pc != 0x1100 || got[1].symbolID != 9 { + t.Fatalf("got %+v", got) + } +} + +func TestParseRecordsMachO(t *testing.T) { + info := &binaryInfo{format: "macho", textStart: 0x100001000, textEnd: 0x100002000, + bindTargets: []uint64{0, 0x100001500}} + // Rebase-encoded slot: chain metadata above the low 36 bits. + rebase := (uint64(3) << 51) | 0x100001100 + // Bind-encoded slot: bit 63, ordinal 1, addend 4. + bind := (uint64(1) << 63) | (uint64(4) << 24) | 1 + // Bind to an unresolved ordinal is dropped. + badBind := (uint64(1) << 63) | 0 + sec := append(rec(rebase, 5), rec(bind, 6)...) + sec = append(sec, rec(badBind, 8)...) + got := parseRecords(info, sec) + if len(got) != 2 { + t.Fatalf("got %+v", got) + } + if got[0].pc != 0x100001100 || got[1].pc != 0x100001504 { + t.Fatalf("decoded pcs %#x %#x", got[0].pc, got[1].pc) + } +} + +func TestDedupeCanonicalAndInline(t *testing.T) { + fn := "example.com/p.F" + host := "example.com/p.Host" + info := &binaryInfo{format: "elf", textStart: 0x1000, textEnd: 0x4000, syms: []textSym{ + {addr: 0x1000, size: 0x100, name: fn}, + {addr: 0x1100, size: 0x100, name: host}, + {addr: 0x1200, size: 0x10, name: "__llgo_stub." + fn}, + }} + id := fnv64(fn) + recs := []siteRecord{ + {pc: 0x1004, symbolID: id}, // canonical, inside F + {pc: 0x1104, symbolID: id}, // inline copy inside Host + {pc: 0x1204, symbolID: id}, // stub wrapper, canonical + {pc: 0x1008, symbolID: id}, // duplicate owner, collapsed + {pc: 0x9999, symbolID: id}, // no owner + } + kept, inline, nosym := dedupe(info, recs, false) + if len(kept) != 2 || inline != 1 || nosym != 1 { + t.Fatalf("kept=%d inline=%d nosym=%d", len(kept), inline, nosym) + } + if kept[0].pc != 0x1000 || kept[1].pc != 0x1200 { + t.Fatalf("normalized pcs %#x %#x", kept[0].pc, kept[1].pc) + } +} + +func TestBuildFtabSortsAndAppendsSentinel(t *testing.T) { + info := &binaryInfo{textStart: 0x1000, textEnd: 0x3000} + kept := []siteRecord{{pc: 0x2000, symbolID: 2}, {pc: 0x1100, symbolID: 1}, {pc: 0x2000, symbolID: 3}} + ftab, base := buildFtab(info, kept) + if base != 0x1100 || len(ftab) != 3 { + t.Fatalf("base=%#x len=%d", base, len(ftab)) + } + if ftab[0].EntryOff != 0 || ftab[1].EntryOff != 0xF00 || ftab[2].EntryOff != 0x3000-0x1100 { + t.Fatalf("offsets %+v", ftab) + } +} + +func TestOwnerLookup(t *testing.T) { + info := &binaryInfo{syms: []textSym{{addr: 0x100, size: 0x10, name: "a"}, {addr: 0x110, size: 0x10, name: "b"}}} + if s, ok := owner(info, 0x105); !ok || s.name != "a" { + t.Fatalf("got %+v ok=%v", s, ok) + } + if _, ok := owner(info, 0x50); ok { + t.Fatal("below first symbol should miss") + } + if _, ok := owner(info, 0x200); ok { + t.Fatal("past extent should miss") + } +} + +func TestFnv64NonZero(t *testing.T) { + if fnv64("") == 0 || fnv64("a") == fnv64("b") { + t.Fatal("fnv sanity") + } +} From fc3a21b057deda9e3cfa91590d2583dc6bd90915 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Fri, 3 Jul 2026 14:55:10 +0800 Subject: [PATCH 54/59] pclnpost,build: Mach-O fixture tests; only re-sign signed binaries A fabricated Mach-O (segments, sections, symtab, chained-fixup imports and an empty page chain) drives load, bind-target resolution, record decoding and both Rewrite outcomes (in-place and stub-section spill) end to end. codesign now runs only when the input carries LC_CODE_SIGNATURE: real lld executables always do, unsigned inputs need no signature and codesign rejects them. Also cover asmQuoteELFSymbol, the empty-table initializers and the Rewrite error paths. Package coverage: pclnpost 69% -> 86%. Co-Authored-By: Claude Fable 5 --- internal/build/funcinfo_table_test.go | 41 ++++ internal/pclnpost/binary.go | 6 + internal/pclnpost/elf_fixture_test.go | 17 ++ internal/pclnpost/macho_fixture_test.go | 312 ++++++++++++++++++++++++ internal/pclnpost/write.go | 5 +- 5 files changed, 380 insertions(+), 1 deletion(-) create mode 100644 internal/pclnpost/macho_fixture_test.go diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index 3bb07a1c43..f5f11762cf 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -561,3 +561,44 @@ func TestFuncInfoTableEmissionMatrix(t *testing.T) { }) } } + +func TestAsmQuoteELFSymbol(t *testing.T) { + cases := map[string]string{ + `plain`: `"plain"`, + `we$ird`: `"we$$ird"`, + `q"uote`: `"q\"uote"`, + `back\slash`: `"back\\slash"`, + } + for in, want := range cases { + if got := asmQuoteELFSymbol(in); got != want { + t.Fatalf("quote(%q) = %q, want %q", in, got, want) + } + } +} + +// Empty encoded tables must materialize null initializers (the ~20-line +// branch in emitFuncInfoTable that only fires for funcinfo-less programs). +func TestFuncInfoTableEmptyEncodedInitializers(t *testing.T) { + prog := llssa.NewProgram(nil) + prog.EnableFuncInfoMetadata(true) + src := prog.NewPackage("example.com/p", "example.com/p") + ctx := &context{ + prog: prog, + buildConf: &Config{ + BuildMode: BuildModeExe, + Goos: "linux", + Goarch: "amd64", + }, + } + emitFuncInfoTable(ctx, src, nil, nil, nil) + ir := src.String() + for _, want := range []string{ + "@__llgo_funcinfo_table = global ptr null", + "@__llgo_pcline_table = global ptr null", + "@__llgo_funcinfo_count = global i64 0", + } { + if !strings.Contains(ir, want) { + t.Fatalf("missing %q in:\n%s", want, ir) + } + } +} diff --git a/internal/pclnpost/binary.go b/internal/pclnpost/binary.go index ef88d8c28d..1d9bdec8cd 100644 --- a/internal/pclnpost/binary.go +++ b/internal/pclnpost/binary.go @@ -69,6 +69,7 @@ type binaryInfo struct { entryVMAddr, entryVMSize, entryFileOff uint64 stubVMAddr, stubVMSize, stubFileOff uint64 + hasCodeSignature bool } // readVM returns n bytes at a link-time virtual address. @@ -109,6 +110,11 @@ func load(path string) (*binaryInfo, error) { } } } + for _, l := range mf.Loads { + if b := l.Raw(); len(b) >= 4 && binary.LittleEndian.Uint32(b) == 0x1D { // LC_CODE_SIGNATURE + info.hasCodeSignature = true + } + } loadBindTargets(info, mf) finish(info) return info, nil diff --git a/internal/pclnpost/elf_fixture_test.go b/internal/pclnpost/elf_fixture_test.go index 3871dba03e..e20511b84c 100644 --- a/internal/pclnpost/elf_fixture_test.go +++ b/internal/pclnpost/elf_fixture_test.go @@ -257,3 +257,20 @@ func TestRewriteELFOverflowFallsBack(t *testing.T) { t.Fatal("binary must be untouched on failure") } } + +func TestRewriteErrorPaths(t *testing.T) { + // No entry records at all. + empty := func(addrOf func(string) uint64) []byte { return nil } + path := buildELF(t, fixtureFns(), empty, empty, 4096, 256) + if _, err := Rewrite(path); err == nil { + t.Fatal("expected no-entry-records error") + } + // Records whose anchors have no owning symbol: dropped, nothing survives. + orphan := func(addrOf func(string) uint64) []byte { + return rec(0xdead0000, 42) + } + path = buildELF(t, fixtureFns(), orphan, empty, 4096, 256) + if _, err := Rewrite(path); err == nil { + t.Fatal("expected no-survivors error") + } +} diff --git a/internal/pclnpost/macho_fixture_test.go b/internal/pclnpost/macho_fixture_test.go new file mode 100644 index 0000000000..32a931a3dc --- /dev/null +++ b/internal/pclnpost/macho_fixture_test.go @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pclnpost + +import ( + "bytes" + "encoding/binary" + "os" + "path/filepath" + "testing" +) + +// buildMachO fabricates a minimal 64-bit Mach-O that debug/macho can Open: +// one __TEXT segment (__text) and one __DATA segment carrying __llgo_fie / +// __llgo_stub, plus LC_SYMTAB and an LC_DYLD_CHAINED_FIXUPS whose imports +// table binds ordinal 1 to a local symbol. +func buildMachO(t *testing.T, entry, stub []byte, syms []elfFn) string { + t.Helper() + const base = uint64(0x100000000) + text := make([]byte, 0x40000) // big enough that findfunctab buckets outgrow a tiny entry section + type lc struct{ b []byte } + var cmds []lc + + sect := func(name, seg string, addr, off, size uint64) []byte { + var b [80]byte + copy(b[0:], name) + copy(b[16:], seg) + binary.LittleEndian.PutUint64(b[32:], addr) + binary.LittleEndian.PutUint64(b[40:], size) + binary.LittleEndian.PutUint32(b[48:], uint32(off)) + return b[:] + } + segment := func(name string, vmaddr, fileoff, filesz uint64, sects [][]byte) []byte { + var h [72]byte + binary.LittleEndian.PutUint32(h[0:], 0x19) // LC_SEGMENT_64 + binary.LittleEndian.PutUint32(h[4:], uint32(72+80*len(sects))) + copy(h[8:], name) + binary.LittleEndian.PutUint64(h[24:], vmaddr) + binary.LittleEndian.PutUint64(h[32:], filesz) + binary.LittleEndian.PutUint64(h[40:], fileoff) + binary.LittleEndian.PutUint64(h[48:], filesz) + binary.LittleEndian.PutUint32(h[64:], uint32(len(sects))) + out := h[:] + for _, s := range sects { + out = append(out, s...) + } + return out + } + + // File layout (fixed offsets, one page apart). + const textOff = uint64(0x1000) + const entryOff = uint64(0x2000) + stubOff := entryOff + uint64(len(entry)) + dataEnd := stubOff + uint64(len(stub)) + symOff := (dataEnd + 0xF) &^ 0xF + fixOff := symOff + 0x800 + + cmds = append(cmds, lc{segment("__TEXT", base, 0, textOff+uint64(len(text)), [][]byte{ + sect("__text", "__TEXT", base+textOff, textOff, uint64(len(text))), + })}) + cmds = append(cmds, lc{segment("__DATA", base+entryOff, entryOff, dataEnd-entryOff, [][]byte{ + sect("__llgo_fie", "__DATA", base+entryOff, entryOff, uint64(len(entry))), + sect("__llgo_stub", "__DATA", base+stubOff, stubOff, uint64(len(stub))), + })}) + + // Symtab: nlist_64 entries + strtab. + strtab := []byte{0} + var nlist bytes.Buffer + for _, fn := range syms { + nameOff := len(strtab) + strtab = append(strtab, "_"+fn.name...) + strtab = append(strtab, 0) + binary.Write(&nlist, binary.LittleEndian, uint32(nameOff)) + nlist.WriteByte(0x0F) // N_SECT|N_EXT + nlist.WriteByte(1) // __text + binary.Write(&nlist, binary.LittleEndian, uint16(0)) + binary.Write(&nlist, binary.LittleEndian, base+textOff+fn.size) // addr encoded via size field as offset + } + strOff := symOff + uint64(nlist.Len()) + var symtabCmd [24]byte + binary.LittleEndian.PutUint32(symtabCmd[0:], 0x2) // LC_SYMTAB + binary.LittleEndian.PutUint32(symtabCmd[4:], 24) + binary.LittleEndian.PutUint32(symtabCmd[8:], uint32(symOff)) + binary.LittleEndian.PutUint32(symtabCmd[12:], uint32(len(syms))) + binary.LittleEndian.PutUint32(symtabCmd[16:], uint32(strOff)) + binary.LittleEndian.PutUint32(symtabCmd[20:], uint32(len(strtab))) + cmds = append(cmds, lc{symtabCmd[:]}) + + // Chained fixups: header + starts(no pages) + one import (ordinal 0 + // unused, ordinal 1 -> first symbol) + names. + var fx bytes.Buffer + fxHdr := make([]byte, 28) + binary.LittleEndian.PutUint32(fxHdr[4:], 28) // starts_offset + fx.Write(fxHdr) + // starts_in_image: seg_count=2; __TEXT has no chains, __DATA gets a + // starts_in_segment whose pages are all "no chain yet" so the rewriter + // can splice its live-relocation inserts. + binary.Write(&fx, binary.LittleEndian, uint32(2)) + binary.Write(&fx, binary.LittleEndian, uint32(0)) + segInfoOff := 12 // seg_count + two offsets, then this struct + binary.Write(&fx, binary.LittleEndian, uint32(segInfoOff)) + // pad to seg_info start (nothing between) + dataPages := int((dataEnd-entryOff)/0x4000 + 1) + segInfo := make([]byte, 22+2*dataPages) + binary.LittleEndian.PutUint32(segInfo[0:], uint32(len(segInfo))) + binary.LittleEndian.PutUint16(segInfo[4:], 0x4000) // page_size + binary.LittleEndian.PutUint16(segInfo[6:], 2) // DYLD_CHAINED_PTR_64 + binary.LittleEndian.PutUint64(segInfo[8:], entryOff) + binary.LittleEndian.PutUint16(segInfo[20:], uint16(dataPages)) + for i := 0; i < dataPages; i++ { + binary.LittleEndian.PutUint16(segInfo[22+2*i:], chainedPtrStartNone) + } + fx.Write(segInfo) + importsOff := fx.Len() + names := []byte{0} + addImport := func(sym string) { + no := len(names) + names = append(names, "_"+sym...) + names = append(names, 0) + binary.Write(&fx, binary.LittleEndian, uint32(no)<<9) + binary.Write(&fx, binary.LittleEndian, int32(0)) + } + addImport("missing.symbol") + if len(syms) > 0 { + addImport(syms[0].name) + } + symbolsOff := fx.Len() + fx.Write(names) + blob := fx.Bytes() + binary.LittleEndian.PutUint32(blob[8:], uint32(importsOff)) + binary.LittleEndian.PutUint32(blob[12:], uint32(symbolsOff)) + binary.LittleEndian.PutUint32(blob[16:], 2) // imports_count + binary.LittleEndian.PutUint32(blob[20:], 2) // DYLD_CHAINED_IMPORT_ADDEND + + var fixCmd [16]byte + binary.LittleEndian.PutUint32(fixCmd[0:], 0x80000034) + binary.LittleEndian.PutUint32(fixCmd[4:], 16) + binary.LittleEndian.PutUint32(fixCmd[8:], uint32(fixOff)) + binary.LittleEndian.PutUint32(fixCmd[12:], uint32(len(blob))) + cmds = append(cmds, lc{fixCmd[:]}) + + var cmdBytes []byte + for _, c := range cmds { + cmdBytes = append(cmdBytes, c.b...) + } + total := fixOff + uint64(len(blob)) + raw := make([]byte, total) + binary.LittleEndian.PutUint32(raw[0:], 0xFEEDFACF) + binary.LittleEndian.PutUint32(raw[4:], 0x0100000C) // CPU_TYPE_ARM64 + binary.LittleEndian.PutUint32(raw[8:], 0) + binary.LittleEndian.PutUint32(raw[12:], 2) // MH_EXECUTE + binary.LittleEndian.PutUint32(raw[16:], uint32(len(cmds))) + binary.LittleEndian.PutUint32(raw[20:], uint32(len(cmdBytes))) + copy(raw[32:], cmdBytes) + copy(raw[textOff:], text) + copy(raw[entryOff:], entry) + copy(raw[stubOff:], stub) + copy(raw[symOff:], nlist.Bytes()) + copy(raw[strOff:], strtab) + copy(raw[fixOff:], blob) + + path := filepath.Join(t.TempDir(), "macho") + if err := os.WriteFile(path, raw, 0755); err != nil { + t.Fatal(err) + } + return path +} + +func TestLoadMachOFixture(t *testing.T) { + const base = uint64(0x100000000) + fns := []elfFn{{name: "example.com/p.A", size: 0x10}, {name: "example.com/p.B", size: 0x40}} + // Entry records: one rebase-encoded (chain bits above bit 36), one bind + // to import ordinal 1 (= example.com/p.A) with addend 4, one bind to the + // unresolved ordinal 0 that must be dropped. + rebase := (uint64(7) << 51) | (base + 0x1000 + 0x44) + bind := (uint64(1) << 63) | (uint64(4) << 24) | 1 + badBind := uint64(1) << 63 + entry := append(rec(rebase, fnv64("example.com/p.B")), rec(bind, fnv64("example.com/p.A"))...) + entry = append(entry, rec(badBind, 99)...) + stub := rec(0, 0) + + path := buildMachO(t, entry, stub, fns) + info, err := load(path) + if err != nil { + t.Fatal(err) + } + if info.format != "macho" { + t.Fatalf("format %s", info.format) + } + if info.textStart != base+0x1000 || info.entryVMAddr != base+0x2000 { + t.Fatalf("layout %#x %#x", info.textStart, info.entryVMAddr) + } + if len(info.bindTargets) != 2 || info.bindTargets[0] != 0 || info.bindTargets[1] != base+0x1000+fns[0].size { + t.Fatalf("bindTargets %#v", info.bindTargets) + } + recs := parseRecords(info, info.entrySec) + if len(recs) != 2 { + t.Fatalf("records %+v", recs) + } + if recs[0].pc != base+0x1000+0x44 { + t.Fatalf("rebase pc %#x", recs[0].pc) + } + if recs[1].pc != info.bindTargets[1]+4 { + t.Fatalf("bind pc %#x", recs[1].pc) + } +} + +// machoRewriteFixture: records anchored inside real text symbols so dedupe +// keeps them, plus a meta record advertising the symbol index inside the +// entry section (readVM resolves it through the __DATA section). +func machoRewriteFixture(t *testing.T, entryPad, stubPad int) string { + t.Helper() + const base = uint64(0x100000000) + fns := []elfFn{{name: "example.com/p.A", size: 0x10}, {name: "example.com/p.B", size: 0x3F000}} // far apart: findfunctab spans many buckets + aAddr := base + 0x1000 + fns[0].size // buildMachO uses size as text offset + bAddr := base + 0x1000 + fns[1].size + idA, idB := fnv64("example.com/p.A"), fnv64("example.com/p.B") + + // Symbol index table + pointer/count globals live in the entry section + // tail so readVM can reach them via the __llgo_fie section. + entryBase := base + 0x2000 + var idx []byte + type sie struct { + id uint64 + i uint32 + } + ids := []sie{{idA, 1}, {idB, 2}} + if ids[0].id > ids[1].id { + ids[0], ids[1] = ids[1], ids[0] + } + for _, e := range ids { + var b [16]byte + binary.LittleEndian.PutUint64(b[0:], e.id) + binary.LittleEndian.PutUint32(b[8:], e.i) + idx = append(idx, b[:]...) + } + // entry layout: meta(3 recs) + 2 records + [idx table][ptr][cnt] + pad + recs := append(rec(0, metaRecordMagic), rec(0, 0)...) // ptr filled below + recs = append(recs, rec(0, 0)...) + recs = append(recs, rec(aAddr+4, idA)...) + recs = append(recs, rec(bAddr+4, idB)...) + idxAddr := entryBase + uint64(len(recs)) + ptrAddr := idxAddr + uint64(len(idx)) + cntAddr := ptrAddr + 8 + var tail []byte + tail = append(tail, idx...) + var p8 [8]byte + binary.LittleEndian.PutUint64(p8[:], idxAddr) + tail = append(tail, p8[:]...) + binary.LittleEndian.PutUint64(p8[:], 2) + tail = append(tail, p8[:]...) + entry := append(recs, tail...) + // backpatch meta rows 2/3 with ptr/cnt addresses + binary.LittleEndian.PutUint64(entry[16:], ptrAddr) + binary.LittleEndian.PutUint64(entry[32:], cntAddr) + entry = append(entry, make([]byte, entryPad)...) + stub := append(rec(0, 0), make([]byte, stubPad)...) + return buildMachO(t, entry, stub, fns) +} + +func TestRewriteMachOInPlace(t *testing.T) { + path := machoRewriteFixture(t, 4096, 512) + st, err := Rewrite(path) + if err != nil { + t.Fatal(err) + } + if st.Format != "macho" || st.FtabEntries != 3 { + t.Fatalf("stats %+v", st) + } + info, err := load(path) + if err != nil { + t.Fatal(err) + } + if got := binary.LittleEndian.Uint64(info.entrySec[0:]); got != prebuiltMagic { + t.Fatalf("magic %#x", got) + } +} + +func TestRewriteMachOSpill(t *testing.T) { + path := machoRewriteFixture(t, 0, 8192) + st, err := Rewrite(path) + if err != nil { + t.Fatal(err) + } + if st.FtabEntries != 3 { + t.Fatalf("stats %+v", st) + } + info, err := load(path) + if err != nil { + t.Fatal(err) + } + if got := binary.LittleEndian.Uint64(info.entrySec[0:]); got != redirectMagic { + t.Fatalf("entry magic %#x", got) + } + if got := binary.LittleEndian.Uint64(info.stubSec[0:]); got != prebuiltMagic { + t.Fatalf("stub magic %#x", got) + } +} diff --git a/internal/pclnpost/write.go b/internal/pclnpost/write.go index 8e848d6705..def30eed2e 100644 --- a/internal/pclnpost/write.go +++ b/internal/pclnpost/write.go @@ -217,7 +217,10 @@ func writeBack(path string, info *binaryInfo, kept []siteRecord) (ftabCount, buc if err := os.WriteFile(path, raw, st.Mode()); err != nil { return 0, 0, err } - if info.format == "macho" && runtime.GOOS == "darwin" { + // Only re-sign binaries that were signed to begin with (lld ad-hoc + // signs real executables; unsigned inputs need no signature and + // codesign would reject them anyway). + if info.format == "macho" && info.hasCodeSignature && runtime.GOOS == "darwin" { if out, err := exec.Command("codesign", "-f", "-s", "-", path).CombinedOutput(); err != nil { return 0, 0, fmt.Errorf("codesign: %v: %s", err, out) } From 861f443939146b5df5b23a4ef75b86d1e6433107 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Fri, 3 Jul 2026 21:51:01 +0800 Subject: [PATCH 55/59] runtime,ssa,cl: LLGo-owned frame-pointer unwinder (Stage 5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every Go function on supported targets keeps the frame-pointer chain ("frame-pointer"="non-leaf", gated by Program.NeedsFramePointer to linux/darwin — on embedded targets the unwinder does not exist and the layout change perturbed the conservative GC on ESP32-C3). runtime.Caller, Callers, CallersFrames, Stack and the unrecovered-panic dump walk [fp]/[fp+w] directly and symbolize through the prebuilt ftab and pcline tables: - Return addresses resolve at pc-1 (Go's convention); statement labels can land exactly on a return address, so raw-pc nearest-below reported the following line. The convention holds with or without the prebuilt table (text bounds fall back to the first-use frame table — link-phase overflow layouts otherwise silently disabled it, the root cause of the amd64 CI failures). - The walk is bounded to the program's own text: libc frames without FP discipline decode as wild pcs that nearest-below would attribute to arbitrary functions. - Methods and anonymous functions are now trackable (methods had no pcline labels; closures lost their innermost frame to tail-call optimization), and mid-function aligned pcs merge statement records instead of returning declaration lines. - frameSymbol results are memoized per pc (deep re-walks paid a dladdr per frame: 32-frame walks 8µs -> 180ns) and the pcline table is built during the startup pre-warm (lazily building it inside the first Caller cost ~200µs at scale). - Shadow-stack instrumentation is no longer emitted; LLGO_SHADOW_STACK=1 keeps the legacy emitters for one release. Tracked functions retain noinline, no-tail-call and the data-only pcline records. - libunwind is gone: the clite stacktrace fallback walks the FP chain with dladdr names (same output format), and linux binaries no longer link -lunwind. Semantics are gc ground truth, verified against go: physical stacks show every real frame; interface-chain Caller marks land at skip 3 and closure chains at skip 4 (the old expectations encoded shadow-stack frame loss). Perf (best-of, mac/linux): hot.Caller0 17/37ns (Go 155/241), deep.Direct512 2.8µs (Go 9.7µs; was 87-95µs), bigfunc.Work 18µs (Go 30µs; was 433µs), binary size unchanged or smaller. Co-Authored-By: Claude Fable 5 --- cl/instr.go | 23 +++- internal/build/funcinfo_table.go | 10 ++ runtime/internal/clite/debug/_wrap/debug.c | 54 +++++---- .../internal/clite/debug/libunwind_linux.go | 7 -- .../clite/debug/{libunwind.go => link.go} | 2 - runtime/internal/lib/runtime/_wrap/runtime.c | 9 ++ runtime/internal/lib/runtime/extern.go | 45 +++++--- .../lib/runtime/pprof_runtime_stub_llgo.go | 8 ++ runtime/internal/lib/runtime/symtab.go | 105 +++++++++++++++++- runtime/internal/lib/runtime/unwind_llgo.go | 86 ++++++++++++++ ssa/decl.go | 8 ++ ssa/funcinfo.go | 17 +++ ssa/package.go | 3 + 13 files changed, 324 insertions(+), 53 deletions(-) delete mode 100644 runtime/internal/clite/debug/libunwind_linux.go rename runtime/internal/clite/debug/{libunwind.go => link.go} (71%) create mode 100644 runtime/internal/lib/runtime/unwind_llgo.go diff --git a/cl/instr.go b/cl/instr.go index 8530ff0ee5..c40dc8fa41 100644 --- a/cl/instr.go +++ b/cl/instr.go @@ -950,7 +950,10 @@ func collectRuntimeCallerFunctions(pkg *ssa.Package) (funcs, trackable map[*ssa. } funcs[fn] = true for _, anon := range fn.AnonFuncs { - add(anon, false) + // Anonymous functions inherit trackability: one that reaches + // runtime.Caller needs its own frame (noinline + no tail calls) + // or physical unwinding loses it. + add(anon, track) } return true } @@ -966,7 +969,10 @@ func collectRuntimeCallerFunctions(pkg *ssa.Package) (funcs, trackable map[*ssa. } methods := pkg.Prog.MethodSets.MethodSet(typ) for i := 0; i < methods.Len(); i++ { - add(pkg.Prog.MethodValue(methods.At(i)), false) + // Methods are as trackable as package-level functions: one + // that (transitively) calls runtime.Caller needs frames and + // pcline labels of its own. + add(pkg.Prog.MethodValue(methods.At(i)), true) } } } @@ -1337,7 +1343,18 @@ func (p *context) runtimeCallerFrameName() string { return "" } +// emitShadowStackInstrumentation gates the legacy shadow-stack calls +// (PushCallerLocationFrame / RecordCallerLocation / RecordPanicLocation). +// The FP-chain unwinder supersedes them: physical pcs resolve through the +// prebuilt ftab and pcline labels, so tracked functions keep only noinline, +// no-tail-call and the label records. The emitters stay for one release as +// an escape hatch (LLGO_SHADOW_STACK=1). +var emitShadowStackInstrumentation = os.Getenv("LLGO_SHADOW_STACK") == "1" + func (p *context) pushCallerLocationFrame(b llssa.Builder, fn *ssa.Function) { + if !emitShadowStackInstrumentation { + return + } if fn == nil { return } @@ -1361,7 +1378,7 @@ func (p *context) recordPanicLocation(b llssa.Builder, pos token.Pos) { } func (p *context) recordRuntimeLocation(b llssa.Builder, pos token.Pos, fn string) { - if !p.shouldTrackCallerFrames() { + if !emitShadowStackInstrumentation || !p.shouldTrackCallerFrames() { return } position := p.fset.Position(pos) diff --git a/internal/build/funcinfo_table.go b/internal/build/funcinfo_table.go index ea57e7a865..c69a4cc76d 100644 --- a/internal/build/funcinfo_table.go +++ b/internal/build/funcinfo_table.go @@ -46,6 +46,7 @@ const ( pcLineCountSymbol = "__llgo_pcline_count" pcSiteStartPtrSymbol = "__llgo_pcsite_start" pcSiteEndPtrSymbol = "__llgo_pcsite_end" + fpChainSymbol = "__llgo_fp_chain" funcInfoDataSymbol = "__llgo_funcinfo_table$data" pcLineDataSymbol = "__llgo_pcline_table$data" funcInfoStringsDataSymbol = "__llgo_funcinfo_strings$data" @@ -349,6 +350,15 @@ func emitFuncInfoTable(ctx *context, pkg llssa.Package, records []funcInfoRecord stubCount := llvm.AddGlobal(mod, countType, funcInfoStubCountSymbol) pcLineCount := llvm.AddGlobal(mod, countType, pcLineCountSymbol) hashMask := llvm.AddGlobal(mod, countType, funcInfoHashMaskSymbol) + // One byte per binary telling the runtime whether Go functions were + // compiled with the frame-pointer attribute — the pairing signal for + // the physical unwinder (runtime fpUnwindAvailable). + fpChain := llvm.AddGlobal(mod, i8Type, fpChainSymbol) + fpChainVal := uint64(0) + if ctx.prog.NeedsFramePointer() { + fpChainVal = 1 + } + fpChain.SetInitializer(llvm.ConstInt(i8Type, fpChainVal, false)) if len(records) == 0 && len(pcLines) == 0 { tablePtr.SetInitializer(llvm.ConstPointerNull(tablePtr.GlobalValueType())) pcLinePtr.SetInitializer(llvm.ConstPointerNull(pcLinePtr.GlobalValueType())) diff --git a/runtime/internal/clite/debug/_wrap/debug.c b/runtime/internal/clite/debug/_wrap/debug.c index a03fb3ca1c..5ab5c5c335 100644 --- a/runtime/internal/clite/debug/_wrap/debug.c +++ b/runtime/internal/clite/debug/_wrap/debug.c @@ -1,5 +1,4 @@ #if defined(__linux__) -#define UNW_LOCAL_ONLY #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif @@ -8,7 +7,7 @@ #include #include -#include +#include void *llgo_address() { return __builtin_return_address(0); @@ -29,31 +28,40 @@ void *llgo_symbol(char *name) { } void llgo_stacktrace(int skip, void *ctx, int (*fn)(void *ctx, void *pc, void *offset, void *sp, char *name)) { + /* Frame-pointer chain walk. LLGo compiles every Go function with + * "frame-pointer"="non-leaf", so [fp] is the previous frame pointer and + * [fp+1] the return address on both arm64 and x86-64. This replaces the + * libunwind cursor: no unwind tables, no -lunwind, and it keeps working + * through any frame that maintains the chain (C code compiled with + * frame pointers included). The walk stops at the first frame that + * breaks chain discipline. + * + * The Go-side walker (runtime/internal/lib/runtime/unwind_llgo.go + * fpCallers) implements the same discipline plus a text-range bound the + * frame tables provide; keep the chain guards below in sync with it. */ int saved_errno = errno; - unw_cursor_t cursor; - unw_context_t context; - unw_word_t offset, pc, sp; - char fname[256]; - unw_getcontext(&context); - unw_init_local(&cursor, &context); + uintptr_t fp = (uintptr_t)__builtin_frame_address(0); int depth = 0; - while (unw_step(&cursor) > 0) { - if (depth < skip) { - depth++; - continue; - } - if (unw_get_reg(&cursor, UNW_REG_IP, &pc) == 0) { - fname[0] = 0; - offset = 0; - if (unw_get_proc_name(&cursor, fname, sizeof(fname), &offset) == 0) { - fname[sizeof(fname) - 1] = 0; - } - unw_get_reg(&cursor, UNW_REG_SP, &sp); - if (fn(ctx, (void*)pc, (void*)offset, (void*)sp, fname) == 0) { - errno = saved_errno; - return; + while (fp) { + uintptr_t prev = *(uintptr_t *)fp; + uintptr_t pc = *((uintptr_t *)fp + 1); + if (pc < 4096) + break; + if (depth >= skip) { + Dl_info info; + const char *name = ""; + uintptr_t offset = 0; + if (dladdr((void *)pc, &info) && info.dli_sname) { + name = info.dli_sname; + offset = pc - (uintptr_t)info.dli_saddr; } + if (fn(ctx, (void *)pc, (void *)offset, (void *)fp, (char *)name) == 0) + break; } + depth++; + if (prev <= fp || prev - fp > (uintptr_t)1 << 20 || (prev & (sizeof(uintptr_t) - 1))) + break; + fp = prev; } errno = saved_errno; } diff --git a/runtime/internal/clite/debug/libunwind_linux.go b/runtime/internal/clite/debug/libunwind_linux.go deleted file mode 100644 index 1fc8a380da..0000000000 --- a/runtime/internal/clite/debug/libunwind_linux.go +++ /dev/null @@ -1,7 +0,0 @@ -//go:build linux && !baremetal - -package debug - -const ( - LLGoPackage = "link: -lunwind" -) diff --git a/runtime/internal/clite/debug/libunwind.go b/runtime/internal/clite/debug/link.go similarity index 71% rename from runtime/internal/clite/debug/libunwind.go rename to runtime/internal/clite/debug/link.go index f9eaed9507..b8c6e0edcb 100644 --- a/runtime/internal/clite/debug/libunwind.go +++ b/runtime/internal/clite/debug/link.go @@ -1,5 +1,3 @@ -//go:build !linux - package debug const ( diff --git a/runtime/internal/lib/runtime/_wrap/runtime.c b/runtime/internal/lib/runtime/_wrap/runtime.c index 4dc23cfd53..cd5af9a95e 100644 --- a/runtime/internal/lib/runtime/_wrap/runtime.c +++ b/runtime/internal/lib/runtime/_wrap/runtime.c @@ -8,3 +8,12 @@ int llgo_maxprocs() return 1; #endif } + +__attribute__((noinline)) void *llgo_framepointer(void) +{ +#if defined(__GNUC__) || defined(__clang__) + return __builtin_frame_address(0); +#else + return 0; +#endif +} diff --git a/runtime/internal/lib/runtime/extern.go b/runtime/internal/lib/runtime/extern.go index 66f95de36f..5ff8cb5a3b 100644 --- a/runtime/internal/lib/runtime/extern.go +++ b/runtime/internal/lib/runtime/extern.go @@ -9,16 +9,31 @@ import ( rtdebug "github.com/goplus/llgo/runtime/internal/runtime" ) +// callerLocation substitutes gc's placeholders for missing position info: +// "???" for an unknown file and line 1 for an unknown line. +func callerLocation(file string, line int) (string, int) { + if file == "" { + file = "???" + } + if line == 0 { + line = 1 + } + return file, line +} + +//go:noinline func Caller(skip int) (pc uintptr, file string, line int, ok bool) { - if frame, ok := rtdebug.Caller(skip); ok { - file = frame.File - line = frame.Line - if file == "" { - file = "???" - } - if line == 0 { - line = 1 + if fpUnwindAvailable() { + var pcs [1]uintptr + if fpCallers(skip+1, pcs[:]) >= 1 { + // pcs hold return addresses; attribute to the call instruction. + sym := frameSymbol(pcs[0] - 1) + file, line = callerLocation(sym.file, sym.line) + return pcs[0], file, line, true } + } + if frame, ok := rtdebug.Caller(skip); ok { + file, line = callerLocation(frame.File, frame.Line) return frame.PC, file, line, true } var pcs [1]uintptr @@ -26,17 +41,17 @@ func Caller(skip int) (pc uintptr, file string, line int, ok bool) { return 0, "", 0, false } sym := frameSymbol(pcs[0]) - file, line = sym.file, sym.line - if file == "" { - file = "???" - } - if line == 0 { - line = 1 - } + file, line = callerLocation(sym.file, sym.line) return pcs[0], file, line, true } +//go:noinline func Callers(skip int, pc []uintptr) int { + if fpUnwindAvailable() { + if n := fpCallers(skip, pc); n > 0 { + return n + } + } if n := rtdebug.Callers(skip, pc); n > 0 { return n } diff --git a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go index e32a36160b..0e145f8abb 100644 --- a/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go +++ b/runtime/internal/lib/runtime/pprof_runtime_stub_llgo.go @@ -127,6 +127,11 @@ func funcForPCSlow(pc uintptr) *Func { return fn } if sym, ok := pcSymbolForFuncInfoIndex(pc, pc, prebuiltFrame(idx).funcIndex); ok { + // amd64 entries are byte-dense: a ret-1 style query can + // coincide with another symbol's entry; statement records + // win via the shared refinement rule (entry queries are + // unaffected — sites never precede their function's entry). + sym = refinePCSymbolLine(sym, pc) fn := newFuncForPC(pc, sym) prebuiltFuncCacheStore(idx, unsafe.Pointer(fn)) cacheFuncForPC(pc, fn) @@ -180,6 +185,9 @@ func funcForPCSlow(pc uintptr) *Func { } } if sym, ok := funcPCFrameForPC(pc); ok { + // Mid-function pcs deserve statement lines, not the declaration + // line (amd64 return addresses can be 4-aligned and land here). + sym = refinePCSymbolLine(sym, pc) fn := newFuncForPC(pc, sym) cacheFuncForPC(pc, fn) return fn diff --git a/runtime/internal/lib/runtime/symtab.go b/runtime/internal/lib/runtime/symtab.go index 43fee60286..e229577144 100644 --- a/runtime/internal/lib/runtime/symtab.go +++ b/runtime/internal/lib/runtime/symtab.go @@ -124,7 +124,11 @@ type frameSymbolCacheEntry struct { name string } -const frameSymbolCacheSize = 128 +// Sized for whole-stack re-walks: CallersFrames over a deep stack touches +// one entry per distinct return pc, and a 128-entry table thrashed on +// 32-frame stacks (adjacent return pcs share high bits, and repeated walks +// paid a pcline search per frame per walk). +const frameSymbolCacheSize = 4096 var frameSymbolCache [frameSymbolCacheSize]frameSymbolCacheEntry @@ -1464,6 +1468,10 @@ func init() { lastStr := funcInfoCString(uint16(runtimeFuncInfoStringCount - 1)) touch(unsafe.Pointer(runtimeFuncInfoStrings), last+uintptr(cStringLen(lastStr))+1) } + // The pcline table is on the Caller/CallersFrames path; building it + // here keeps the first user lookup at steady-state cost (the build cost + // scales with call-site count and showed up as a 200µs first Caller). + initRuntimePCLineFrames() // One synthetic lookup warms the code paths themselves (allocator size // classes, lookup caches), not just the data pages. if prebuiltFrameCount() > 0 { @@ -1582,6 +1590,24 @@ func pcSymbolForFuncInfoIndex(pc, entry uintptr, funcIndex uint32) (pcSymbol, bo }, true } +// refinePCSymbolLine upgrades a function-record symbol to statement +// granularity when a same-function pcline record covers pc, or the call +// instruction at pc-1 for return addresses (statement labels can sit +// exactly on a return address). Cross-function records are rejected by +// pcLineFrameForPC's entry check, so exact-entry queries keep their +// declaration line. This is the single place the pc/pc-1 statement rule +// lives; FuncForPC and CallersFrames must agree on it. +func refinePCSymbolLine(sym pcSymbol, pc uintptr) pcSymbol { + if lineSym, ok := pcLineFrameForPC(pc, sym.entry); ok { + return mergePCLineSymbol(sym, lineSym) + } + if lineSym, ok := pcLineFrameForPC(pc-1, sym.entry); ok { + lineSym.pc = pc + return mergePCLineSymbol(sym, lineSym) + } + return sym +} + func initRuntimePCLineFrames() { if latomic.LoadUint32(&runtimePCLineInitState) == runtimeFuncInfoInitDone { return @@ -1968,8 +1994,61 @@ func mergePCLineSymbol(base, line pcSymbol) pcSymbol { return line } +// prebuiltTextContains reports whether pc falls inside the text range the +// prebuilt table covers (first entry .. end-of-text sentinel). When the +// link-phase rewrite was skipped (e.g. blob overflow) the first-use frame +// table provides the bounds instead — the pc-1 return-address convention +// and the walk bound must not silently turn off with the fast table, or +// frames get attributed to the next statement (a return address equals the +// following anchor exactly). +func prebuiltTextContains(pc uintptr) bool { + if n := len(runtimePrebuiltFtab); n > 0 { + return pc >= runtimePrebuiltBase && pc-runtimePrebuiltBase < uintptr(runtimePrebuiltFtab[n-1].entryOff) + } + if frames := runtimeFuncPCFrames; len(frames) > 0 { + const lastFuncSlack = 1 << 20 + return pc >= frames[0].entry && pc < frames[len(frames)-1].entry+lastFuncSlack + } + return false +} + +// frameSymbolResultCache memoizes full symbolization results per pc. Deep +// CallersFrames walks re-symbolize the same return addresses on every walk, +// and a miss below costs a dladdr; entries are immutable heap nodes so the +// benign-racy word-sized store never tears. +type frameSymbolResult struct { + pc uintptr + sym pcSymbol +} + +const frameSymbolResultCacheSize = 4096 + +var frameSymbolResultCache [frameSymbolResultCacheSize]*frameSymbolResult + +// minLegalPC: nothing below the zero page can be code. Values under it are +// null-ish slots or shadow-stack synthetic markers, never return addresses. +const minLegalPC = 4096 + func frameSymbol(pc uintptr) pcSymbol { - if pc&3 != 0 { + if pc > minLegalPC { + i := (pc >> 2) & uintptr(len(frameSymbolResultCache)-1) + if e := frameSymbolResultCache[i]; e != nil && e.pc == pc { + return e.sym + } + sym := frameSymbolUncached(pc) + frameSymbolResultCache[i] = &frameSymbolResult{pc: pc, sym: sym} + return sym + } + return frameSymbolUncached(pc) +} + +func frameSymbolUncached(pc uintptr) pcSymbol { + if pc&3 != 0 && !prebuiltTextContains(pc+1) { + // Unaligned pcs outside the text range are shadow-stack synthetic + // markers. Text-range pcs — return addresses minus one, and on + // amd64 any instruction pc — flow through the normal lookups: + // pcline nearest-below is byte-exact, no alignment games (rounding + // by instruction size was an arm64-only assumption). if frame, ok := rtdebug.FrameForPC(pc); ok { return pcSymbol{ pc: pc, @@ -2004,6 +2083,15 @@ func frameSymbol(pc uintptr) pcSymbol { lineSym.pc = pc return lineSym } + // Resolve through the prebuilt table before touching the dynamic + // loader: frames in packages without pcline records (the runtime + // itself) otherwise cost a dladdr each on the first full-stack walk. + if runtimeFuncPCFramesBuilt() { + if funcSym, ok := funcPCFrameForPC(pc); ok { + funcSym.pc = pc + return refinePCSymbolLine(funcSym, pc) + } + } sym := addrInfoSymbol(pc) if lineSym, ok := pcLineFrameForPC(pc, sym.entry); ok { return mergePCLineSymbol(sym, lineSym) @@ -2051,7 +2139,18 @@ func (ci *Frames) Next() (frame Frame, more bool) { } else { pc, ci.callers = ci.callers[0], ci.callers[1:] } - sym := frameSymbol(pc) + // Physical pcs are return addresses; attribute them to the call + // instruction (Go's pc-1 convention). Statement labels can land + // exactly on a return address — the next statement's marker sits + // right after the call — so a raw-pc nearest-below lookup would + // report the following line. Synthetic shadow-stack pcs live + // outside the text range and keep raw-pc semantics. + lookupPC := pc + if prebuiltTextContains(pc) { + lookupPC = pc - 1 + } + sym := frameSymbol(lookupPC) + sym.pc = pc if !sym.ok { ci.frames = append(ci.frames, Frame{ PC: pc, diff --git a/runtime/internal/lib/runtime/unwind_llgo.go b/runtime/internal/lib/runtime/unwind_llgo.go new file mode 100644 index 0000000000..9e4732f3b7 --- /dev/null +++ b/runtime/internal/lib/runtime/unwind_llgo.go @@ -0,0 +1,86 @@ +//go:build !baremetal && !wasm + +package runtime + +import "unsafe" + +//go:linkname c_framepointer C.llgo_framepointer +func c_framepointer() unsafe.Pointer + +// maxFPStride bounds how far up the stack one frame may sit from the next. +// A slot whose decoded parent is further away than any plausible frame is a +// corrupt chain, not a giant frame; stop rather than walk off the stack. +const maxFPStride = 1 << 20 + +// fpCallers walks the frame-pointer chain and fills pc with return +// addresses, Go-style: pc[0] is the return address in the frame `skip` +// levels above the caller of fpCallers. Every LLGo-compiled function keeps +// x29/rbp chained ("frame-pointer"="non-leaf" is set on all Go functions), +// so unlike the shadow stack this sees every physical frame; the walk stops +// at the first frame that breaks the chain discipline (e.g. foreign C code +// compiled without frame pointers). +// +// The clite walker (runtime/internal/clite/debug/_wrap/debug.c +// llgo_stacktrace) implements the same chain discipline and guards for the +// pre-table paths (unrecovered-panic dump, last-resort Callers fallback); +// keep the two in sync when changing the walk rules. +// +//go:noinline +func fpCallers(skip int, pc []uintptr) int { + if len(pc) == 0 { + return 0 + } + // The walk bound needs the frame table's text range; make sure it is + // built (no-op when the prebuilt table was adopted at startup). + initRuntimeFuncPCFrames() + fp := uintptr(c_framepointer()) + n := 0 + // The helper's saved chain starts at our own frame; skip fpCallers + // itself so skip counting matches the caller's view. + skip++ + const maxFrames = 4096 + for i := 0; fp != 0 && n < len(pc) && i < maxFrames; i++ { + prev := *(*uintptr)(unsafe.Pointer(fp)) + ret := *(*uintptr)(unsafe.Pointer(fp + unsafe.Sizeof(uintptr(0)))) + if ret < minLegalPC { + break + } + // Beyond main the chain runs into libc frames without FP + // discipline; their slots decode as wild pcs that nearest-below + // symbolization would map to arbitrary functions. Bound the walk + // to the program's own text (Go tracebacks stop at runtime.main + // for the same reason). + if !prebuiltTextContains(ret) { + break + } + if skip > 0 { + skip-- + } else { + pc[n] = ret + n++ + } + // Stacks grow down, so the chain must strictly increase; bound the + // stride so a corrupt slot cannot walk off the stack. + if prev <= fp || prev-fp > maxFPStride || prev&(unsafe.Sizeof(uintptr(0))-1) != 0 { + break + } + fp = prev + } + return n +} + +// runtimeFPChain is emitted next to the funcinfo table (one per binary, +// internal/build emitFuncInfoTable) and records whether this binary's Go +// functions were compiled with the frame-pointer attribute +// (ssa.Program.NeedsFramePointer). +// +//go:linkname runtimeFPChain __llgo_fp_chain +var runtimeFPChain uint8 + +// fpUnwindAvailable reports whether the physical walk can be used for the +// public stack APIs: the compiler declared the FP chain intact for this +// binary, and the funcinfo tables are present (without them symbolization +// would fall back to dlsym anyway). +func fpUnwindAvailable() bool { + return runtimeFPChain != 0 && runtimeFuncInfoTable != nil && runtimeFuncInfoCount > 0 +} diff --git a/ssa/decl.go b/ssa/decl.go index 3e17b40e9f..82ba5026a7 100644 --- a/ssa/decl.go +++ b/ssa/decl.go @@ -258,6 +258,14 @@ func (p Package) NewFuncEx(name string, sig *types.Signature, bg Background, has fn := llvm.AddFunction(p.mod, name, t.ll) if bg == InGo { fn.AddFunctionAttr(p.nullPointerIsValidAttr) + // Keep frame pointers so the runtime can walk real stacks (FP chain) + // for Callers/panic tracebacks instead of shadow-stack bookkeeping. + // Only where that unwinder exists: on embedded targets the attribute + // is pure cost, and the changed stack layout can retain stale slots + // under the conservative GC (observed on ESP32-C3). + if p.Prog.NeedsFramePointer() { + fn.AddFunctionAttr(p.framePointerAttr) + } } if instantiated { fn.SetLinkage(llvm.LinkOnceAnyLinkage) diff --git a/ssa/funcinfo.go b/ssa/funcinfo.go index 7e3979a9be..25e792b6c3 100644 --- a/ssa/funcinfo.go +++ b/ssa/funcinfo.go @@ -105,3 +105,20 @@ func (p Package) EmitPCLineInfo(id uint64, symbol, file string, line, column int }), ) } + +// NeedsFramePointer reports whether functions should keep the frame-pointer +// chain: only on OS targets where the runtime's FP unwinder runs. +func (p Program) NeedsFramePointer() bool { + target := p.Target() + if target == nil { + return true + } + if target.Target != "" || target.GOARCH == "wasm" { + return false + } + switch target.GOOS { + case "linux", "darwin", "": + return true + } + return false +} diff --git a/ssa/package.go b/ssa/package.go index 9826102218..563b68fde5 100644 --- a/ssa/package.go +++ b/ssa/package.go @@ -466,11 +466,13 @@ func (p Program) NewPackage(name, pkgPath string) Package { strs := make(map[string]llvm.Value) glbDbgVars := make(map[Expr]bool) nullPointerIsValidAttr := mod.Context().CreateEnumAttribute(llvm.AttributeKindID("null_pointer_is_valid"), 0) + framePointerAttr := mod.Context().CreateStringAttribute("frame-pointer", "non-leaf") // Don't need reset p.needPyInit here // p.needPyInit = false ret := &aPackage{ mod: mod, path: pkgPath, Prog: p, vars: gbls, fns: fns, nullPointerIsValidAttr: nullPointerIsValidAttr, + framePointerAttr: framePointerAttr, pyobjs: pyobjs, pymods: pymods, strs: strs, di: nil, cu: nil, glbDbgVars: glbDbgVars, export: make(map[string]string), @@ -727,6 +729,7 @@ type aPackage struct { Prog Program nullPointerIsValidAttr llvm.Attribute + framePointerAttr llvm.Attribute di diBuilder cu CompilationUnit From 699a3dd7c406cd767fdad5f51c41ba8cba3eec50 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Fri, 3 Jul 2026 21:51:01 +0800 Subject: [PATCH 56/59] test: goldens and probes for the FP unwinder IR goldens gain the frame-pointer attribute (out.ll files carry no attribute groups and needed no regeneration); the legacy shadow-stack emitter assertions opt into LLGO_SHADOW_STACK; statement-line probes move to gc ground-truth skip counts; NeedsFramePointer target matrix and pclnpost symbolAddr/decodePtr edges covered. Co-Authored-By: Claude Fable 5 --- cl/_testgo/sigsegv/in.go | 2 +- cl/caller_frame_test.go | 9 +++++++ cl/compile_test.go | 6 ++--- internal/build/funcinfo_table_test.go | 21 ++++++++++++++++ internal/pclnpost/logic_test.go | 33 +++++++++++++++++++++++++ ssa/needsfp_test.go | 26 ++++++++++++++++++++ ssa/ssa_test.go | 34 +++++++++++++------------- test/go/runtime_statement_line_test.go | 8 ++++-- 8 files changed, 116 insertions(+), 23 deletions(-) create mode 100644 ssa/needsfp_test.go diff --git a/cl/_testgo/sigsegv/in.go b/cl/_testgo/sigsegv/in.go index f9e071e3da..f7dd48bada 100644 --- a/cl/_testgo/sigsegv/in.go +++ b/cl/_testgo/sigsegv/in.go @@ -27,4 +27,4 @@ func main() { println("main") } -// CHECK: attributes #0 = { null_pointer_is_valid } +// CHECK: attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index 69d570d9ee..2fb4800456 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -481,6 +481,9 @@ func TestRuntimeFrameNameNormalization(t *testing.T) { } func TestCompileRuntimeCallerFrameInstrumentation(t *testing.T) { + old := emitShadowStackInstrumentation + emitShadowStackInstrumentation = true + defer func() { emitShadowStackInstrumentation = old }() ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo import "runtime/debug" @@ -719,6 +722,9 @@ func top() { } func TestCompileRuntimeCallerFrameUsesGoNameForLinkname(t *testing.T) { + old := emitShadowStackInstrumentation + emitShadowStackInstrumentation = true + defer func() { emitShadowStackInstrumentation = old }() ssapkg, files := buildCallerFrameSSAPackage(t, "command-line-arguments", `package main import "runtime" @@ -797,6 +803,9 @@ func f() { _ = runtime.FuncForPC(0) } } func TestCompileRuntimeCallerLocationOnlyForRuntimePaths(t *testing.T) { + old := emitShadowStackInstrumentation + emitShadowStackInstrumentation = true + defer func() { emitShadowStackInstrumentation = old }() ssapkg, files := buildCallerFrameSSAPackage(t, "example.com/foo", `package foo import "runtime" diff --git a/cl/compile_test.go b/cl/compile_test.go index 5eccc11707..9a5f731c11 100644 --- a/cl/compile_test.go +++ b/cl/compile_test.go @@ -385,7 +385,7 @@ _llgo_2: ; preds = %_llgo_1, %_llgo_0 ret void } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -420,7 +420,7 @@ _llgo_2: ; preds = %_llgo_1, %_llgo_0 ret void } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -461,6 +461,6 @@ _llgo_0: ret i8 %1 } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } diff --git a/internal/build/funcinfo_table_test.go b/internal/build/funcinfo_table_test.go index f5f11762cf..e7a58751d3 100644 --- a/internal/build/funcinfo_table_test.go +++ b/internal/build/funcinfo_table_test.go @@ -596,9 +596,30 @@ func TestFuncInfoTableEmptyEncodedInitializers(t *testing.T) { "@__llgo_funcinfo_table = global ptr null", "@__llgo_pcline_table = global ptr null", "@__llgo_funcinfo_count = global i64 0", + "@__llgo_fp_chain = global i8 1", } { if !strings.Contains(ir, want) { t.Fatalf("missing %q in:\n%s", want, ir) } } } + +// Targets without the frame-pointer attribute must declare the chain +// broken so the runtime never attempts a physical walk there. +func TestFuncInfoTableFPChainOff(t *testing.T) { + prog := llssa.NewProgram(&llssa.Target{GOOS: "windows", GOARCH: "amd64"}) + prog.EnableFuncInfoMetadata(true) + src := prog.NewPackage("example.com/p", "example.com/p") + ctx := &context{ + prog: prog, + buildConf: &Config{ + BuildMode: BuildModeExe, + Goos: "windows", + Goarch: "amd64", + }, + } + emitFuncInfoTable(ctx, src, nil, nil, nil) + if ir := src.String(); !strings.Contains(ir, "@__llgo_fp_chain = global i8 0") { + t.Fatalf("missing fp_chain=0 in:\n%s", ir) + } +} diff --git a/internal/pclnpost/logic_test.go b/internal/pclnpost/logic_test.go index a37f7932b5..16b68da0e5 100644 --- a/internal/pclnpost/logic_test.go +++ b/internal/pclnpost/logic_test.go @@ -114,3 +114,36 @@ func TestFnv64NonZero(t *testing.T) { t.Fatal("fnv sanity") } } + +func TestSymbolAddrBothFormats(t *testing.T) { + elfPath := buildELF(t, fixtureFns(), fixtureEntry, fixtureStub, 4096, 256) + if addr, err := symbolAddr(elfPath, "example.com/p.A"); err != nil || addr != 0x10000 { + t.Fatalf("elf symbolAddr = %#x, %v", addr, err) + } + if _, err := symbolAddr(elfPath, "no.such.symbol"); err == nil { + t.Fatal("expected missing-symbol error on elf") + } + machoPath := buildMachO(t, rec(0, 0), rec(0, 0), + []elfFn{{name: "example.com/p.M", size: 0x10}}) + if addr, err := symbolAddr(machoPath, "example.com/p.M"); err != nil || addr == 0 { + t.Fatalf("macho symbolAddr = %#x, %v", addr, err) + } + if _, err := symbolAddr(machoPath, "no.such.symbol"); err == nil { + t.Fatal("expected missing-symbol error on macho") + } +} + +func TestDecodePtrVal(t *testing.T) { + elf := &binaryInfo{format: "elf"} + if got := decodePtrVal(elf, 0x1234); got != 0x1234 { + t.Fatalf("elf passthrough %#x", got) + } + macho := &binaryInfo{format: "macho", imageBase: 0x100000000} + chained := (uint64(5) << 51) | 0x100000abc + if got := decodePtrVal(macho, chained); got != 0x100000abc { + t.Fatalf("chained decode %#x", got) + } + if got := decodePtrVal(macho, 0x100000abc); got != 0x100000abc { + t.Fatalf("plain macho %#x", got) + } +} diff --git a/ssa/needsfp_test.go b/ssa/needsfp_test.go new file mode 100644 index 0000000000..21b8100cfa --- /dev/null +++ b/ssa/needsfp_test.go @@ -0,0 +1,26 @@ +//go:build !llgo +// +build !llgo + +package ssa + +import "testing" + +func TestNeedsFramePointer(t *testing.T) { + cases := []struct { + target *Target + want bool + }{ + {nil, true}, + {&Target{GOOS: "linux", GOARCH: "amd64"}, true}, + {&Target{GOOS: "darwin", GOARCH: "arm64"}, true}, + {&Target{GOOS: "linux", GOARCH: "wasm"}, false}, + {&Target{GOOS: "linux", GOARCH: "riscv32", Target: "esp32c3"}, false}, + {&Target{GOOS: "windows", GOARCH: "amd64"}, false}, + } + for _, c := range cases { + p := NewProgram(c.target) + if got := p.NeedsFramePointer(); got != c.want { + t.Fatalf("NeedsFramePointer(%+v) = %v, want %v", c.target, got, c.want) + } + } +} diff --git a/ssa/ssa_test.go b/ssa/ssa_test.go index 38160c8696..306e004fa4 100644 --- a/ssa/ssa_test.go +++ b/ssa/ssa_test.go @@ -987,7 +987,7 @@ _llgo_0: ret i64 %2 } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -1053,7 +1053,7 @@ _llgo_0: ; Function Attrs: null_pointer_is_valid declare ptr @"github.com/goplus/llgo/runtime/internal/runtime.AllocU"(i64) #0 -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `, wrapRef, wrapRef) assertPkg(t, pkg, expected) } @@ -1222,7 +1222,7 @@ _llgo_0: ret i64 %4 } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -1279,7 +1279,7 @@ _llgo_0: ; Function Attrs: null_pointer_is_valid declare ptr @"github.com/goplus/llgo/runtime/internal/runtime.AllocU"(i64) #0 -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -1366,7 +1366,7 @@ _llgo_0: ; Function Attrs: null_pointer_is_valid declare ptr @"github.com/goplus/llgo/runtime/internal/runtime.IfacePtrData"(%"github.com/goplus/llgo/runtime/internal/runtime.iface") #0 -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -1871,7 +1871,7 @@ _llgo_0: ret i1 true } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -1928,7 +1928,7 @@ source_filename = "foo/bar" ; Function Attrs: null_pointer_is_valid declare void @fn(i64) #0 -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -1951,7 +1951,7 @@ _llgo_0: ret i64 1 } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -1974,7 +1974,7 @@ _llgo_0: ret i64 %0 } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -2011,7 +2011,7 @@ _llgo_0: ret void } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -2041,7 +2041,7 @@ _llgo_0: ret { i64, double } %1 } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -2060,7 +2060,7 @@ _llgo_0: br label %_llgo_0 } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -2097,7 +2097,7 @@ _llgo_2: ; preds = %_llgo_0 ret i64 0 } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -2138,7 +2138,7 @@ _llgo_0: ret i64 %2 } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -2169,7 +2169,7 @@ _llgo_0: ret i64 %2 } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -2228,7 +2228,7 @@ _llgo_0: ret i64 %6 } -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } `) } @@ -2386,7 +2386,7 @@ _llgo_0: ; Function Attrs: returns_twice declare i32 @setjmp(ptr) #1 -attributes #0 = { null_pointer_is_valid } +attributes #0 = { null_pointer_is_valid "frame-pointer"="non-leaf" } attributes #1 = { returns_twice } `) } diff --git a/test/go/runtime_statement_line_test.go b/test/go/runtime_statement_line_test.go index 81430b49f0..9d86021cd2 100644 --- a/test/go/runtime_statement_line_test.go +++ b/test/go/runtime_statement_line_test.go @@ -110,13 +110,17 @@ func (indirectCallerImpl) call() { //go:noinline func interfaceMiddle() { - checkCallerLine("interface", 2, INTERFACE_CALL_LINE) + // Go ground truth: 0=checkCallerLine, 1=interfaceMiddle, 2=the method + // frame, 3=the interface call site (verified against gc). + checkCallerLine("interface", 3, INTERFACE_CALL_LINE) } //go:noinline func checkClosureIndirectCaller() { f := closureLayer(closureLayer(func() { - checkCallerLine("closure", 3, CLOSURE_CALL_LINE) + // Go ground truth: 0=checkCallerLine, 1=the anonymous function, + // 2..3=the two closureLayer trampolines, 4=the call site. + checkCallerLine("closure", 4, CLOSURE_CALL_LINE) })) f() // CLOSURE_CALL_MARK } From 4925b82d10323a3b2260a4f0666ad257b20457a9 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Fri, 3 Jul 2026 21:51:01 +0800 Subject: [PATCH 57/59] doc: stage5 invariants, diagnostic traps and merge queue --- doc/design/stage5-handoff.md | 68 ++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 doc/design/stage5-handoff.md diff --git a/doc/design/stage5-handoff.md b/doc/design/stage5-handoff.md new file mode 100644 index 0000000000..ab0f4cf2c5 --- /dev/null +++ b/doc/design/stage5-handoff.md @@ -0,0 +1,68 @@ +# Stage 5 notes: the FP unwinder's non-obvious invariants + +Durable conclusions from bringing up the frame-pointer unwinder (PR #2019). +Read together with #2019's PR body and `pclntab-linkphase.md`. The full +session-by-session investigation log lives in the PR discussion. + +## Invariants that must not silently degrade + +- **pc-1 attribution and the walk bound must survive without the prebuilt + table.** When a layout overflows the entry section, the link-phase rewrite + backs off and `runtimePrebuiltFtab` stays empty. If `prebuiltTextContains` + then answers false, two things turn off silently: the pc-1 return-address + convention (frames get attributed to the *next* statement — a return + address equals the following pcline anchor exactly) and the FP walk's + text bound (libc tail frames decode as wild pcs). The fix is the + first-use frame-table fallback bounds in `prebuiltTextContains`, and + `fpCallers` building that table up front. Any future change to the table + adoption path must keep this fallback alive. + +- **Mid-function aligned pcs need pcline merging on amd64.** arm64 + instructions are 4-byte aligned, so a `ret-1` query is always unaligned + and can't collide with a function entry or an aligned-branch lookup. + amd64 entries and return addresses are byte-dense: `ret-1` can be + 4-aligned and can even equal another symbol's entry. Every path that + resolves a function record must therefore also consult same-function + pcline statement records — this is centralized in `refinePCSymbolLine` + (symtab.go); do not add a new resolution path that bypasses it. + +- **The two FP walkers must stay in sync.** `fpCallers` + (runtime/internal/lib/runtime/unwind_llgo.go) and `llgo_stacktrace` + (runtime/internal/clite/debug/_wrap/debug.c) implement the same chain + discipline ([fp]/[fp+wordsize], strictly increasing, bounded stride, + word alignment). The C walker serves pre-table paths (unrecovered-panic + dump, last-resort fallback). + +- **The FP attribute is target-gated.** `ssa.Program.NeedsFramePointer()` + says where the chain is emitted (linux/darwin, non-embedded, non-wasm); + the compiler records the decision in the per-binary `__llgo_fp_chain` + byte, and the runtime's `fpUnwindAvailable` trusts that flag plus table + presence. On ESP32-C3 the attribute interacted badly with the + conservative GC ("clearing nested struct did not free all objects"), so + keep embedded targets off unless that is understood. + +## Diagnostic traps + +- `nm` on `libexport.a` is useless: the archive nests `pkg-*.a` members. + Inspect the linked dylib/executable instead. +- Statement labels can land exactly on return addresses; a "wrong line" + report one statement late is the signature of a raw-pc (not pc-1) lookup. +- An amd64-only symbolization bug with green arm64 almost always traces to + one of: byte-dense entries (collisions), 4-aligned ret-1 (aligned-branch + path), or the empty-prebuilt fallback above. + +## Local repro environments + +- colima-qemu / container `llgo-amd64`: amd64 toolchain, stage5 clone at + `/root/s5` (rebuild: fetch + reset + `go build -o /usr/local/bin/llgo + ./cmd/llgo`). +- colima-llgo-perf / container `llgo-linux-final`: linux-arm64 bench matrix + (mounts /work-s5, /work-2016, /work-2012). + +## Merge queue + +1. Merge order #2012 -> #2016 -> #2019, rebasing between each. +2. Semantics PRs 1918/1882/1892/1906 rebase after the merges (1918/1882 + overlap needs an ordering decision), then reimplement 1925/1903/ + 1924-residual/1905, then P4 (zero-copy names, prebuilt pcline, + !pcsections, section shrink) per #2004. From cc5f42203665a2e68351d3324a899346f6963ab6 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Sat, 4 Jul 2026 06:54:25 +0800 Subject: [PATCH 58/59] =?UTF-8?q?runtime,cl:=20user-visible=20caller=20inf?= =?UTF-8?q?o=20=E2=80=94=20Go-style=20panic=20tracebacks,=20exact=20log/sl?= =?UTF-8?q?og/testing=20locations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An unrecovered panic now prints a Go-style traceback (function names plus file:line per physical frame) through a PanicTraceback hook the public runtime registers; the clite dladdr dump remains the fallback when the FP walk or the tables are unavailable. Caller-frame tracking now applies uniformly: the blanket stdlib exclusion is gone, so the same per-package reaches-runtime.Caller analysis that already covered third-party code tracks log.Output, slog's Logger.log and testing's decorate chains (their thin wrappers were inlined, making fixed Caller depths count past them — log.Lshortfile printed "???:1"). Call sites into caller-pc-consuming functions of other packages get a statement anchor so the attributed frame reports the exact line. The collector also picks up named-type methods declared by the package itself — a type used only concretely never enters RuntimeTypes, which is exactly how slog.(*Logger).Info escaped tracking. hello-world size cost: +368 bytes (the traceback printer). Co-Authored-By: Claude Fable 5 --- cl/caller_frame_test.go | 26 +++- cl/compile.go | 16 ++- cl/funcinfo_metadata_test.go | 6 +- cl/instr.go | 140 ++++++++++++++++++-- runtime/internal/lib/runtime/runtime2.go | 22 ++- runtime/internal/lib/runtime/unwind_llgo.go | 69 +++++++++- runtime/internal/runtime/z_default.go | 4 +- runtime/internal/runtime/z_rt.go | 8 ++ 8 files changed, 266 insertions(+), 25 deletions(-) diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index 2fb4800456..909477fe78 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -381,8 +381,18 @@ type T struct{} func (T) Call() { runtime.Caller(0) } var _ = T{} `) - if runtimeCallerFuncSet(methodOnlyPkg) != nil { - t.Fatal("method-only runtime caller use should not mark top-level functions") + methodOnlySet := runtimeCallerFuncSet(methodOnlyPkg) + if methodOnlySet == nil { + t.Fatal("a method calling runtime.Caller must be tracked (slog.(*Logger).Info escaped exactly this way)") + } + foundMethod := false + for fn := range methodOnlySet { + if fn.Name() == "Call" { + foundMethod = true + } + } + if !foundMethod { + t.Fatal("method-only runtime caller set should contain the method itself") } } @@ -407,7 +417,7 @@ func TestCallerFrameTrackingEligibility(t *testing.T) { {name: "disabled flag", pkgPath: "example.com/foo", want: false}, {name: "named target", pkgPath: "example.com/foo", track: true, targetName: "esp32", want: false}, {name: "wasm", pkgPath: "example.com/foo", track: true, goarch: "wasm", want: false}, - {name: "stdlib", pkgPath: "fmt", track: true, want: false}, + {name: "stdlib", pkgPath: "fmt", track: true, want: true}, {name: "runtime", pkgPath: "runtime", track: true, want: false}, {name: "llgo runtime", pkgPath: llssa.PkgRuntime, track: true, want: false}, {name: "llgo runtime internal", pkgPath: "github.com/goplus/llgo/runtime/internal/foo", track: true, want: false}, @@ -443,8 +453,14 @@ func f() { runtime.Caller(0) } }) } - if canTrackCallerFramesForPackage("net/http") { - t.Fatal("stdlib paths without dots should not track caller frames") + // Tracking applies uniformly; only the runtime core is excluded. + if !canTrackCallerFramesForPackage("net/http") { + t.Fatal("stdlib packages must be trackable like any other code") + } + if canTrackCallerFramesForPackage("runtime") || + canTrackCallerFramesForPackage(llssa.PkgRuntime) || + canTrackCallerFramesForPackage("github.com/goplus/llgo/runtime/internal/lib") { + t.Fatal("runtime core must stay untracked") } } diff --git a/cl/compile.go b/cl/compile.go index 3cd538dd80..b3aa4db524 100644 --- a/cl/compile.go +++ b/cl/compile.go @@ -566,7 +566,7 @@ func (p *context) compileFuncDecl(pkg llssa.Package, f *ssa.Function) (llssa.Fun goName = funcName(pkgTypes, f, false) } pos := p.funcInfoPosition(f) - pkg.EmitFuncInfo(fn.Name(), goName, pos.Filename, pos.Line, pos.Column) + pkg.EmitFuncInfo(fn.Name(), funcInfoDisplayName(pkgTypes, goName), pos.Filename, pos.Line, pos.Column) } var childInits []func() if len(f.AnonFuncs) > 0 { @@ -653,6 +653,20 @@ func (p *context) compileFuncDecl(pkg llssa.Package, f *ssa.Function) (llssa.Fun return fn, nil, goFunc } +// funcInfoDisplayName normalizes a funcinfo metadata display name to gc's +// reporting conventions: the main package is "main" no matter what the +// module names it (frame filters in the wild match on the "main." prefix), +// and anonymous functions are pkg.fn.funcN (our linker symbols use $N). +// Linker symbols are not affected. +func funcInfoDisplayName(pkgTypes *types.Package, goName string) string { + if pkgTypes != nil && pkgTypes.Name() == "main" { + if path := llssa.PathOf(pkgTypes); path != "main" && strings.HasPrefix(goName, path+".") { + goName = "main" + goName[len(path):] + } + } + return normalizeRuntimeAnonFuncName(goName) +} + func hasNoInlineDirective(f *ssa.Function) bool { decl, _ := f.Syntax().(*ast.FuncDecl) if decl == nil || decl.Doc == nil { diff --git a/cl/funcinfo_metadata_test.go b/cl/funcinfo_metadata_test.go index 5af5800613..fe7785e8cc 100644 --- a/cl/funcinfo_metadata_test.go +++ b/cl/funcinfo_metadata_test.go @@ -87,8 +87,10 @@ func (T) method() {} if got := records["foo.leaf"].name; got != "foo.leaf" { t.Fatalf("leaf stack frame name = %q, want foo.leaf", got) } - if got := records["foo.top$1"].name; got != "foo.top$1" { - t.Fatalf("closure stack frame name = %q, want foo.top$1", got) + // The linker symbol keeps the $N form; the display name is normalized + // to gc's .funcN convention. + if got := records["foo.top$1"].name; got != "foo.top.func1" { + t.Fatalf("closure stack frame name = %q, want foo.top.func1", got) } if got := records["foo.top"].name; got != "foo.top" { t.Fatalf("caller stack frame name = %q, want foo.top", got) diff --git a/cl/instr.go b/cl/instr.go index c40dc8fa41..7a4a32378a 100644 --- a/cl/instr.go +++ b/cl/instr.go @@ -26,6 +26,7 @@ import ( "os" "regexp" "strings" + "sync" "golang.org/x/tools/go/ssa" @@ -866,17 +867,19 @@ func (p *context) shouldTrackCallerFrames() bool { return canTrackCallerFramesForPackage(p.pkg.Path()) } +// canTrackCallerFramesForPackage excludes only the runtime core, whose +// frames are unwinder plumbing rather than user code. Everything else — +// stdlib, third-party, user packages — goes through the same per-package +// analysis: functions that (transitively, within the package) reach a +// runtime.Caller/Callers call must keep physical frames (log.Output, +// slog's Logger.log, testing's decorate chains qualify exactly this way), +// and packages that never read caller pcs track nothing and pay nothing. func canTrackCallerFramesForPackage(pkgPath string) bool { return pkgPath != llssa.PkgRuntime && pkgPath != "runtime" && - !isStandardLibraryPackage(pkgPath) && !strings.HasPrefix(pkgPath, "github.com/goplus/llgo/runtime/internal/") } -func isStandardLibraryPackage(pkgPath string) bool { - return pkgPath != "command-line-arguments" && !strings.Contains(pkgPath, ".") -} - func packageUsesRuntimeCaller(pkg *ssa.Package) bool { return len(runtimeCallerFuncSet(pkg)) != 0 } @@ -891,10 +894,106 @@ func fnUsesRuntimeCaller(fn *ssa.Function) bool { return runtimeCallerFuncSet(fn.Pkg)[fn] } +// runtimeCallerFuncSet is the per-package tracking set: functions that +// must keep physical frames (noinline, no tail calls) and get statement +// anchors at their call sites. Two criteria feed it: +// +// 1. the function (transitively, within the package) reaches a +// runtime.Caller/Callers call — it consumes caller pcs itself; +// 2. the function statically calls another package's pc-consuming +// function (log.Println, slog methods, t.Errorf, ...) — its frame is +// what the callee's fixed Caller depth attributes, so inlining it +// would both mis-attribute the location and, on ELF, drop the +// function symbol its pcline sections are link-ordered to. +// +// Criterion 2 tests membership against the callee package's *base* set +// (criterion 1 alone), so tracking extends exactly one call level past a +// pc-consuming package and does not cascade through arbitrary wrapper +// layers; multi-package wrapper chains remain the P4 inline-tree's job. func runtimeCallerFuncSet(pkg *ssa.Package) map[*ssa.Function]bool { if pkg == nil { return nil } + if v, ok := runtimeCallerExtendedCache.Load(pkg); ok { + set, _ := v.(map[*ssa.Function]bool) + return set + } + base := runtimeCallerBaseSet(pkg) + out := make(map[*ssa.Function]bool, len(base)) + for fn := range base { + out[fn] = true + } + _, trackable := collectRuntimeCallerFunctions(pkg) + for fn := range trackable { + if out[fn] { + continue + } + // Criterion 3: pin program-unique frames. main.main and package + // init functions run once, so noinline is free — and they are the + // bottom frames of almost every panic traceback, where an + // approximate declaration-adjacent line is most visible. + if isProgramUniqueFrame(pkg, fn) { + out[fn] = true + continue + } + // Criterion 4: //go:noinline functions already keep their frames, + // so statement anchors are free of the usual inlining cost — their + // panic-traceback lines become exact instead of + // declaration-adjacent. + if hasNoInlineDirective(fn) { + out[fn] = true + continue + } + forEachCall(fn, func(call *ssa.CallCommon) { + callee := call.StaticCallee() + if callee == nil || callee.Pkg == nil || callee.Pkg == pkg { + return + } + if !canTrackCallerFramesForPackage(callee.Pkg.Pkg.Path()) { + return + } + if runtimeCallerBaseSet(callee.Pkg)[callee] { + out[fn] = true + } + }) + } + if len(out) == 0 { + out = nil + } + runtimeCallerExtendedCache.Store(pkg, out) + return out +} + +var ( + runtimeCallerBaseCache sync.Map // *ssa.Package -> map[*ssa.Function]bool + runtimeCallerExtendedCache sync.Map // *ssa.Package -> map[*ssa.Function]bool +) + +func isProgramUniqueFrame(pkg *ssa.Package, fn *ssa.Function) bool { + if fn == nil || fn.Parent() != nil { + return false + } + name := fn.Name() + if name == "init" || strings.HasPrefix(name, "init#") { + return true + } + return name == "main" && pkg.Pkg != nil && pkg.Pkg.Name() == "main" +} + +func runtimeCallerBaseSet(pkg *ssa.Package) map[*ssa.Function]bool { + if pkg == nil { + return nil + } + if v, ok := runtimeCallerBaseCache.Load(pkg); ok { + set, _ := v.(map[*ssa.Function]bool) + return set + } + set := computeRuntimeCallerBaseSet(pkg) + runtimeCallerBaseCache.Store(pkg, set) + return set +} + +func computeRuntimeCallerBaseSet(pkg *ssa.Package) map[*ssa.Function]bool { funcs, trackable := collectRuntimeCallerFunctions(pkg) analysis := &runtimeCallerAnalysis{ pkg: pkg, @@ -962,19 +1061,34 @@ func collectRuntimeCallerFunctions(pkg *ssa.Package) (funcs, trackable map[*ssa. add(fn, true) } } - if pkg.Prog != nil && pkg.Pkg != nil { - for _, typ := range pkg.Prog.RuntimeTypes() { - if !typeBelongsToPackage(typ, pkg.Pkg) { - continue - } + if pkg.Prog != nil { + // Methods are as trackable as package-level functions: one that + // (transitively) calls runtime.Caller needs frames and pcline + // labels of its own. + addMethods := func(typ types.Type) { methods := pkg.Prog.MethodSets.MethodSet(typ) for i := 0; i < methods.Len(); i++ { - // Methods are as trackable as package-level functions: one - // that (transitively) calls runtime.Caller needs frames and - // pcline labels of its own. add(pkg.Prog.MethodValue(methods.At(i)), true) } } + // Named-type methods are not package members and a type used only + // concretely never enters RuntimeTypes (slog.(*Logger).Info was + // missed exactly this way); collect both receiver forms from the + // package's own type declarations. + for _, member := range pkg.Members { + if t, ok := member.(*ssa.Type); ok { + addMethods(t.Type()) + addMethods(types.NewPointer(t.Type())) + } + } + if pkg.Pkg != nil { + for _, typ := range pkg.Prog.RuntimeTypes() { + if !typeBelongsToPackage(typ, pkg.Pkg) { + continue + } + addMethods(typ) + } + } } for changed := true; changed; { changed = false diff --git a/runtime/internal/lib/runtime/runtime2.go b/runtime/internal/lib/runtime/runtime2.go index 8bf049e087..464beb911e 100644 --- a/runtime/internal/lib/runtime/runtime2.go +++ b/runtime/internal/lib/runtime/runtime2.go @@ -36,8 +36,11 @@ func Stack(buf []byte, all bool) int { } out = append(out, ':') out = appendInt(out, frame.Line) - out = append(out, ' ') - out = append(out, "+0x0\n"...) + if frame.Entry != 0 && frame.PC >= frame.Entry { + out = append(out, " +0x"...) + out = appendHexUint(out, uintptr(frame.PC-frame.Entry)) + } + out = append(out, '\n') if !more { break } @@ -50,6 +53,21 @@ func Stack(buf []byte, all bool) int { return len(out) } +func appendHexUint(buf []byte, v uintptr) []byte { + const digits = "0123456789abcdef" + if v == 0 { + return append(buf, '0') + } + var tmp [16]byte + i := len(tmp) + for v > 0 { + i-- + tmp[i] = digits[v&0xf] + v >>= 4 + } + return append(buf, tmp[i:]...) +} + func appendInt(out []byte, v int) []byte { if v == 0 { return append(out, '0') diff --git a/runtime/internal/lib/runtime/unwind_llgo.go b/runtime/internal/lib/runtime/unwind_llgo.go index 9e4732f3b7..c76e3dec0b 100644 --- a/runtime/internal/lib/runtime/unwind_llgo.go +++ b/runtime/internal/lib/runtime/unwind_llgo.go @@ -2,11 +2,78 @@ package runtime -import "unsafe" +import ( + "unsafe" + + rtdebug "github.com/goplus/llgo/runtime/internal/runtime" +) //go:linkname c_framepointer C.llgo_framepointer func c_framepointer() unsafe.Pointer +func init() { + rtdebug.PanicTraceback = panicTraceback +} + +func hasPrefix(s, prefix string) bool { + return len(s) >= len(prefix) && s[:len(prefix)] == prefix +} + +// panicTraceback prints a Go-style stack trace for an unrecovered panic: +// one "function(...)" line plus an indented file:line per physical frame, +// matching the shape of runtime.Stack and gc's panic output. Reports false +// (caller falls back to the clite dladdr dump) when the FP walk or the +// tables are unavailable. +func panicTraceback(skip int) bool { + if !fpUnwindAvailable() { + return false + } + var pcs [64]uintptr + n := fpCallers(skip, pcs[:]) + if n <= 0 { + return false + } + print("goroutine 1 [running]:\n") + frames := CallersFrames(pcs[:n]) + skippingPlumbing := true + for { + frame, more := frames.Next() + name := frame.Function + if name == "" { + name = unknownFunctionName(frame.PC) + } + // The frames between the hook and the panic site are runtime + // plumbing (Rethrow, Panic, ...); their depth varies by panic + // path, so filter by package rather than a fixed skip. + if skippingPlumbing { + if hasPrefix(name, "github.com/goplus/llgo/runtime/internal/") { + if more { + continue + } + break + } + skippingPlumbing = false + } + print(name, "(...)\n\t") + if frame.File == "" { + print("???") + } else { + print(frame.File) + } + print(":", frame.Line) + // gc appends the frame pc's offset from the function entry; the + // value is codegen-specific, only the format matches. + if frame.Entry != 0 && frame.PC >= frame.Entry { + print(" +0x", string(appendHexUint(nil, uintptr(frame.PC-frame.Entry)))) + } + print("\n") + if !more { + break + } + } + return true +} + // maxFPStride bounds how far up the stack one frame may sit from the next. // A slot whose decoded parent is further away than any plausible frame is a // corrupt chain, not a giant frame; stop rather than walk off the stack. diff --git a/runtime/internal/runtime/z_default.go b/runtime/internal/runtime/z_default.go index cffe6cbf01..d0757f9cb1 100644 --- a/runtime/internal/runtime/z_default.go +++ b/runtime/internal/runtime/z_default.go @@ -19,7 +19,9 @@ func Rethrow(link *Defer) { if ptr := excepKey.Get(); ptr != nil { if link == nil { TracePanic(*(*any)(ptr)) - debug.PrintStack(2) + if PanicTraceback == nil || !PanicTraceback(2) { + debug.PrintStack(2) + } c.Free(ptr) c.Exit(2) } else { diff --git a/runtime/internal/runtime/z_rt.go b/runtime/internal/runtime/z_rt.go index 4cd79f22ac..2617e31f22 100644 --- a/runtime/internal/runtime/z_rt.go +++ b/runtime/internal/runtime/z_rt.go @@ -83,6 +83,14 @@ func TracePanic(v any) { println("\n") } +// PanicTraceback, when set by the public runtime package, prints a Go-style +// stack trace (function + file:line per frame) for an unrecovered panic and +// reports whether it printed anything. skip counts physical frames above the +// runtime plumbing frame that invokes the hook, matching clite +// debug.PrintStack's convention. When unset or when it reports false, the +// caller falls back to the clite frame dump. +var PanicTraceback func(skip int) bool + /* func stringTracef(fp c.FilePtr, format *c.Char, s String) { cs := c.Alloca(uintptr(s.len) + 1) From 5e895e75f2f4dd8486704e0e356bfb4df9729aa0 Mon Sep 17 00:00:00 2001 From: Li Jie Date: Sat, 4 Jul 2026 06:54:26 +0800 Subject: [PATCH 59/59] test: end-user caller-info acceptance suite Four scenarios, every expectation verified against gc: unrecovered panic tracebacks; log.Lshortfile and slog AddSource (text+JSON, package funcs and logger methods); a failing t.Errorf under llgo test; and an introspection grab-bag (goroutine/init/defer callers, FuncForPC names for methods, closures and generics, the errors-with-stack capture idiom). Co-Authored-By: Claude Fable 5 --- cl/caller_frame_test.go | 153 ++++++++ test/_manualtest/README.md | 61 +++ test/_manualtest/callers/go.mod | 3 + test/_manualtest/callers/main.go | 92 +++++ test/_manualtest/cexcept/go.mod | 3 + test/_manualtest/cexcept/main.go | 55 +++ test/_manualtest/cexcept/wrap/fault.c | 31 ++ test/_manualtest/logging/go.mod | 3 + test/_manualtest/logging/main.go | 21 + test/_manualtest/panic/go.mod | 3 + test/_manualtest/panic/main.go | 18 + test/_manualtest/testfail/go.mod | 3 + test/_manualtest/testfail/x_test.go | 13 + test/go/caller_acceptance_test.go | 543 ++++++++++++++++++++++++++ 14 files changed, 1002 insertions(+) create mode 100644 test/_manualtest/README.md create mode 100644 test/_manualtest/callers/go.mod create mode 100644 test/_manualtest/callers/main.go create mode 100644 test/_manualtest/cexcept/go.mod create mode 100644 test/_manualtest/cexcept/main.go create mode 100644 test/_manualtest/cexcept/wrap/fault.c create mode 100644 test/_manualtest/logging/go.mod create mode 100644 test/_manualtest/logging/main.go create mode 100644 test/_manualtest/panic/go.mod create mode 100644 test/_manualtest/panic/main.go create mode 100644 test/_manualtest/testfail/go.mod create mode 100644 test/_manualtest/testfail/x_test.go create mode 100644 test/go/caller_acceptance_test.go diff --git a/cl/caller_frame_test.go b/cl/caller_frame_test.go index 909477fe78..40561d5e3f 100644 --- a/cl/caller_frame_test.go +++ b/cl/caller_frame_test.go @@ -845,3 +845,156 @@ func f() { t.Fatalf("caller location tracking should not emit old TLS instrumentation:\n%s", ir) } } + +// importerFunc adapts a lookup function to types.Importer. +type importerFunc func(string) (*types.Package, error) + +func (f importerFunc) Import(path string) (*types.Package, error) { return f(path) } + +// buildCallerFrameSSAProgram builds dep and root packages with bodies in a +// single SSA program, so cross-package analysis (runtimeCallerFuncSet +// criterion 2) can consult the callee package's own base set. +func buildCallerFrameSSAProgram(t *testing.T, depPath, depSrc, rootPath, rootSrc string) (dep, root *gossa.Package) { + t.Helper() + fset := token.NewFileSet() + parse := func(name, src string) *ast.File { + file, err := parser.ParseFile(fset, name, src, parser.ParseComments) + if err != nil { + t.Fatal(err) + } + return file + } + depFile := parse("dep.go", depSrc) + rootFile := parse("root.go", rootSrc) + base := packages.NewImporter(fset) + prog := gossa.NewProgram(fset, gossa.SanityCheckFunctions|gossa.InstantiateGenerics) + created := map[*types.Package]bool{} + var createDeps func(p *types.Package) + createDeps = func(p *types.Package) { + if created[p] { + return + } + created[p] = true + for _, imp := range p.Imports() { + createDeps(imp) + } + prog.CreatePackage(p, nil, nil, true) + } + check := func(path string, file *ast.File, imp types.Importer) (*types.Package, *types.Info) { + info := &types.Info{ + Types: map[ast.Expr]types.TypeAndValue{}, + Defs: map[*ast.Ident]types.Object{}, + Uses: map[*ast.Ident]types.Object{}, + Implicits: map[ast.Node]types.Object{}, + Scopes: map[ast.Node]*types.Scope{}, + Selections: map[*ast.SelectorExpr]*types.Selection{}, + Instances: map[*ast.Ident]types.Instance{}, + } + pkg := types.NewPackage(path, file.Name.Name) + if err := types.NewChecker(&types.Config{Importer: imp}, fset, pkg, info).Files([]*ast.File{file}); err != nil { + t.Fatal(err) + } + return pkg, info + } + depPkgT, depInfo := check(depPath, depFile, base) + for _, p := range depPkgT.Imports() { + createDeps(p) + } + depSSA := prog.CreatePackage(depPkgT, []*ast.File{depFile}, depInfo, true) + created[depPkgT] = true + rootImp := importerFunc(func(path string) (*types.Package, error) { + if path == depPath { + return depPkgT, nil + } + return base.Import(path) + }) + rootPkgT, rootInfo := check(rootPath, rootFile, rootImp) + for _, p := range rootPkgT.Imports() { + createDeps(p) + } + rootSSA := prog.CreatePackage(rootPkgT, []*ast.File{rootFile}, rootInfo, true) + prog.Build() + return depSSA, rootSSA +} + +// Criterion 2: calling another package's pc-consuming function makes the +// caller trackable — decided by that package's analysis, not by name. +func TestRuntimeCallerFuncSetCrossPackage(t *testing.T) { + depSSA, rootSSA := buildCallerFrameSSAProgram(t, + "example.com/dep", `package dep + +import "runtime" + +func Where() bool { + _, _, _, ok := runtime.Caller(0) + return ok +} + +func Quiet() int { return 1 } +`, + "example.com/root", `package root + +import "example.com/dep" + +func Logs() bool { return dep.Where() } + +func Plain() int { return dep.Quiet() } +`) + if !runtimeCallerBaseSet(depSSA)[depSSA.Func("Where")] { + t.Fatal("dep.Where must be in its own package's base set") + } + set := runtimeCallerFuncSet(rootSSA) + if !set[rootSSA.Func("Logs")] { + t.Fatal("caller of a pc-consuming function must be tracked (criterion 2)") + } + if set[rootSSA.Func("Plain")] { + t.Fatal("caller of a quiet function must not be tracked") + } +} + +// Criteria 3 and 4: program-unique frames (main.main, init) and +// //go:noinline functions are pinned; plain helpers are not. +func TestRuntimeCallerFuncSetPinnedFrames(t *testing.T) { + ssapkg, _ := buildCallerFrameSSAPackage(t, "example.com/cmd", `package main + +func main() { helper(); pinned() } + +func init() { helper() } + +//go:noinline +func pinned() {} + +func helper() {} +`) + set := runtimeCallerFuncSet(ssapkg) + for _, name := range []string{"main", "init", "pinned"} { + if !set[ssapkg.Func(name)] { + t.Fatalf("%s must be pinned in the tracking set", name) + } + } + if set[ssapkg.Func("helper")] { + t.Fatal("plain helper must not be tracked") + } +} + +// Display names follow gc's reporting conventions regardless of the module +// path; linker symbols are untouched. +func TestFuncInfoDisplayName(t *testing.T) { + mainPkg := types.NewPackage("example.com/cmd", "main") + libPkg := types.NewPackage("example.com/lib", "lib") + cases := []struct { + pkg *types.Package + in, want string + }{ + {mainPkg, "example.com/cmd.main", "main.main"}, + {mainPkg, "example.com/cmd.main$2", "main.main.func2"}, + {mainPkg, "other/path.f", "other/path.f"}, + {libPkg, "example.com/lib.f$1", "example.com/lib.f.func1"}, + {nil, "plain.f$x", "plain.f$x"}, + } + for _, c := range cases { + if got := funcInfoDisplayName(c.pkg, c.in); got != c.want { + t.Fatalf("funcInfoDisplayName(%q) = %q, want %q", c.in, got, c.want) + } + } +} diff --git a/test/_manualtest/README.md b/test/_manualtest/README.md new file mode 100644 index 0000000000..ddb96abd29 --- /dev/null +++ b/test/_manualtest/README.md @@ -0,0 +1,61 @@ +# Manual caller-info acceptance playground + +Each directory runs under both `go` and `llgo` for side-by-side comparison. +Every scenario also has an automated regression in `test/go` +(`caller_acceptance_test.go`); this playground exists for eyeballing real +output. From the repository root: + + export LLGO_ROOT=$(git rev-parse --show-toplevel) + go build -o /tmp/llgo ./cmd/llgo # or: go run ./cmd/llgo ... + +Conformance bar: output format and user-code file:line match gc exactly; +runtime-internal, patched-stdlib and startup frames may differ. + +## panic — Go-style traceback for an unrecovered panic + cd test/_manualtest/panic + go run . # gc's goroutine traceback + /tmp/llgo run . # same shape: names + file:line + offset (exit 2) + +## logging — log.Lshortfile / slog AddSource + cd test/_manualtest/logging + go run . && /tmp/llgo run . # the main.go:NN locations must agree + +## callers — Caller ladder / CallersFrames / FuncForPC panorama + cd test/_manualtest/callers + go run . > /tmp/gc.txt; /tmp/llgo run . > /tmp/llgo.txt + diff /tmp/gc.txt /tmp/llgo.txt + # line columns must agree; known diffs: runtime-internal frame lines, + # gc's runtime.main/goexit tail frames + +## testfail — llgo test failure locations + cd test/_manualtest/testfail + go test . # x_test.go:NN: boom + /tmp/llgo test . # the same x_test.go:NN: boom + +## cexcept — hardware faults in C code called from Go + cd test/_manualtest/cexcept + /tmp/llgo run . segv recover # NULL store in C: recover works, prints + # the post-recover stack + /tmp/llgo run . segv norecover # unrecovered: panic + gc-style traceback + # (fault-site frames pending the + # panic-snapshot follow-up) + /tmp/llgo run . div recover # arm64 integer division does not trap + # (hardware returns 0); amd64 raises + # SIGFPE + +Verified (darwin/arm64 + linux/arm64 + linux/amd64): +- SIGSEGV in a C frame converts to a Go panic; recover observes gc's exact + error text. +- Known gaps (recorded for the follow-up PRs): + 1. The fault-site stack (cexc_leaf_segv -> cexc_mid_segv x3 -> cexc_segv + -> Go frames) is not visible yet — recover/tracebacks show the + post-longjmp stack. The panic-snapshot follow-up extends to signal + handlers: walk the FP chain from the ucontext pc/fp; C frames get + dladdr names, Go frames funcinfo names — same machinery as the + unwinder. + 2. Only SIGSEGV is installed; SIGFPE (amd64 division) core-dumps, SIGBUS + is not handled. + 3. No sigaltstack: on stack overflow the handler cannot run and the + process dies (gc prints "stack overflow"). Note that C-side UB gets + propagated by clang (this test was once optimized into infinite + recursion); wrap/fault.c uses a volatile pointer to prevent that. diff --git a/test/_manualtest/callers/go.mod b/test/_manualtest/callers/go.mod new file mode 100644 index 0000000000..ba3082ae71 --- /dev/null +++ b/test/_manualtest/callers/go.mod @@ -0,0 +1,3 @@ +module manualcallers + +go 1.21 diff --git a/test/_manualtest/callers/main.go b/test/_manualtest/callers/main.go new file mode 100644 index 0000000000..35f3aa8f1b --- /dev/null +++ b/test/_manualtest/callers/main.go @@ -0,0 +1,92 @@ +package main + +import ( + "fmt" + "runtime" + "sync" +) + +type thing struct{} + +var sinkA, sinkB int + +//go:noinline +func (thing) valueMethod() uintptr { sinkA++; pc, _, _, _ := runtime.Caller(0); return pc } + +//go:noinline +func (*thing) ptrMethod() uintptr { sinkB++; pc, _, _, _ := runtime.Caller(0); return pc } + +//go:noinline +func level3() { + // Caller(skip): 0=self 1=level2 2=level1 3=main + for skip := 0; skip <= 3; skip++ { + _, file, line, ok := runtime.Caller(skip) + fmt.Printf("Caller(%d): %s:%d ok=%v\n", skip, short(file), line, ok) + } + var pcs [16]uintptr + n := runtime.Callers(0, pcs[:]) + frames := runtime.CallersFrames(pcs[:n]) + for { + f, more := frames.Next() + fmt.Printf("frame: %-28s %s:%d\n", f.Function, short(f.File), f.Line) + if !more { + break + } + } +} + +//go:noinline +func level2() { level3() } + +//go:noinline +func level1() { level2() } + +func short(p string) string { + for i := len(p) - 1; i >= 0; i-- { + if p[i] == '/' { + return p[i+1:] + } + } + return p +} + +type stackErr struct { + pcs [8]uintptr + n int +} + +//go:noinline +func newStackErr() *stackErr { + e := &stackErr{} + e.n = runtime.Callers(1, e.pcs[:]) + return e +} + +func main() { + level1() + + var t thing + fmt.Println("valueMethod:", runtime.FuncForPC(t.valueMethod()).Name()) + fmt.Println("ptrMethod: ", runtime.FuncForPC((&t).ptrMethod()).Name()) + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + _, file, line, _ := runtime.Caller(0) + fmt.Printf("goroutine Caller: %s:%d\n", short(file), line) + }() + wg.Wait() + + e := newStackErr() // capture site: deferred symbolization must report this line + frames := runtime.CallersFrames(e.pcs[:e.n]) + for { + f, more := frames.Next() + if f.Function == "main.main" { + fmt.Printf("stackErr captured at: %s:%d\n", short(f.File), f.Line) + } + if !more { + break + } + } +} diff --git a/test/_manualtest/cexcept/go.mod b/test/_manualtest/cexcept/go.mod new file mode 100644 index 0000000000..da96627bde --- /dev/null +++ b/test/_manualtest/cexcept/go.mod @@ -0,0 +1,3 @@ +module cexcept + +go 1.21 diff --git a/test/_manualtest/cexcept/main.go b/test/_manualtest/cexcept/main.go new file mode 100644 index 0000000000..dc679d1d9f --- /dev/null +++ b/test/_manualtest/cexcept/main.go @@ -0,0 +1,55 @@ +package main + +import ( + "fmt" + "os" + "runtime/debug" + _ "unsafe" +) + +const ( + LLGoFiles = "wrap/fault.c" +) + +//go:linkname cexcSegv C.cexc_segv +func cexcSegv(depth int32) + +//go:linkname cexcDiv C.cexc_div +func cexcDiv(den int32) int32 + +//go:noinline +func callC(kind string) { + switch kind { + case "segv": + cexcSegv(2) // C: cexc_segv -> cexc_mid_segv x3 -> cexc_leaf_segv -> *NULL + case "div": + fmt.Println("div result:", cexcDiv(0)) // arm64 does not trap (returns 0); amd64 raises SIGFPE + } +} + +//go:noinline +func viaGo(kind string) { + callC(kind) +} + +func main() { + kind, mode := "segv", "recover" + if len(os.Args) > 1 { + kind = os.Args[1] + } + if len(os.Args) > 2 { + mode = os.Args[2] + } + if mode == "recover" { + defer func() { + if r := recover(); r != nil { + fmt.Println("recovered:", r) + os.Stdout.Write(debug.Stack()) + } else { + fmt.Println("no panic for:", kind) + } + }() + } + viaGo(kind) + fmt.Println("survived", kind) +} diff --git a/test/_manualtest/cexcept/wrap/fault.c b/test/_manualtest/cexcept/wrap/fault.c new file mode 100644 index 0000000000..b36ce533dc --- /dev/null +++ b/test/_manualtest/cexcept/wrap/fault.c @@ -0,0 +1,31 @@ +#include + +/* volatile: a bare NULL store is UB and clang propagates it into + * "unreachable", turning the recursion below into an infinite loop. */ +static int32_t *volatile cexc_null; +volatile int32_t cexc_marks; +volatile int32_t cexc_den; + +void cexc_leaf_segv(void) { + *cexc_null = 42; +} + +void cexc_mid_segv(int32_t depth) { + if (depth > 0) { + cexc_mid_segv(depth - 1); + cexc_marks++; + return; + } + cexc_leaf_segv(); + cexc_marks++; +} + +void cexc_segv(int32_t depth) { + cexc_mid_segv(depth); + cexc_marks++; +} + +int32_t cexc_div(int32_t den) { + cexc_den = den; + return 1000 / cexc_den; +} diff --git a/test/_manualtest/logging/go.mod b/test/_manualtest/logging/go.mod new file mode 100644 index 0000000000..9e2ef07850 --- /dev/null +++ b/test/_manualtest/logging/go.mod @@ -0,0 +1,3 @@ +module manuallogging + +go 1.21 diff --git a/test/_manualtest/logging/main.go b/test/_manualtest/logging/main.go new file mode 100644 index 0000000000..9fc00aba79 --- /dev/null +++ b/test/_manualtest/logging/main.go @@ -0,0 +1,21 @@ +package main + +import ( + "log" + "log/slog" + "os" +) + +func main() { + log.SetFlags(log.Lshortfile) + log.Println("via log package func") + + logger := log.New(os.Stderr, "", log.Lshortfile) + logger.Printf("via *Logger method") + + sl := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{AddSource: true})) + sl.Info("via slog text handler") + + slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{AddSource: true}))) + slog.Warn("via slog json handler") +} diff --git a/test/_manualtest/panic/go.mod b/test/_manualtest/panic/go.mod new file mode 100644 index 0000000000..3e5c0f499b --- /dev/null +++ b/test/_manualtest/panic/go.mod @@ -0,0 +1,3 @@ +module manualpanic + +go 1.21 diff --git a/test/_manualtest/panic/main.go b/test/_manualtest/panic/main.go new file mode 100644 index 0000000000..37c973eb5d --- /dev/null +++ b/test/_manualtest/panic/main.go @@ -0,0 +1,18 @@ +package main + +import "runtime" + +//go:noinline +func inner() { + panic("manual-boom") +} + +//go:noinline +func outer() { + inner() +} + +func main() { + _ = runtime.NumCPU() + outer() +} diff --git a/test/_manualtest/testfail/go.mod b/test/_manualtest/testfail/go.mod new file mode 100644 index 0000000000..1a2bbaa3b2 --- /dev/null +++ b/test/_manualtest/testfail/go.mod @@ -0,0 +1,3 @@ +module testfail + +go 1.21 diff --git a/test/_manualtest/testfail/x_test.go b/test/_manualtest/testfail/x_test.go new file mode 100644 index 0000000000..fba739749c --- /dev/null +++ b/test/_manualtest/testfail/x_test.go @@ -0,0 +1,13 @@ +package testfail + +import "testing" + +func TestBoom(t *testing.T) { + t.Errorf("boom: expected failure to show this file:line") +} + +func TestOK(t *testing.T) { + if 1+1 != 2 { + t.Fatal("math broke") + } +} diff --git a/test/go/caller_acceptance_test.go b/test/go/caller_acceptance_test.go new file mode 100644 index 0000000000..d078765025 --- /dev/null +++ b/test/go/caller_acceptance_test.go @@ -0,0 +1,543 @@ +/* + * Copyright (c) 2026 The XGo Authors (xgo.dev). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// End-user acceptance for caller information: every scenario asserts the +// output an application developer actually sees — log prefixes, slog source +// attributes, testing failure locations, panic tracebacks — with the +// expected values verified against gc (`go run` prints the identical +// file:line for each probe). +package gotest + +import ( + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "sync" + "testing" +) + +// Scenario: the standard log package reports the caller's file:line +// (Lshortfile/Llongfile), and log/slog's AddSource reports it as a source +// attribute — through both the convenience functions and logger methods. +const loggingAcceptanceProbe = `package main + +import ( + "log" + "log/slog" + "os" + "strconv" + "strings" +) + +var checked = 0 + +func expectPrefix(out, want string) { + if !strings.HasPrefix(out, want) { + panic("bad location: got " + out + ", want prefix " + want) + } + checked++ +} + +type sink struct{ strings.Builder } + +func main() { + var buf sink + log.SetOutput(&buf) + log.SetFlags(log.Lshortfile) + log.Println("p") // PKGLOG_MARK + expectPrefix(buf.String(), "main.go:"+strconv.Itoa(PKGLOG_LINE)+":") + + buf.Reset() + logger := log.New(&buf, "", log.Lshortfile) + logger.Printf("m") // NEWLOG_MARK + expectPrefix(buf.String(), "main.go:"+strconv.Itoa(NEWLOG_LINE)+":") + + buf.Reset() + sl := slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{AddSource: true})) + sl.Info("s") // SLOGTXT_MARK + if !strings.Contains(buf.String(), "main.go:"+strconv.Itoa(SLOGTXT_LINE)+" ") { + panic("bad slog source: " + buf.String()) + } + checked++ + + buf.Reset() + slog.SetDefault(slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{AddSource: true}))) + slog.Warn("w") // SLOGJSN_MARK + if !strings.Contains(buf.String(), "main.go\",\"line\":"+strconv.Itoa(SLOGJSN_LINE)) { + panic("bad slog json source: " + buf.String()) + } + checked++ + + if checked != 4 { + panic("scenario undercount") + } + os.Stdout.WriteString("LOGGING_OK\n") +} +` + +func TestCallerAcceptanceLogging(t *testing.T) { + runCallerAcceptanceProbe(t, loggingAcceptanceProbe, "LOGGING_OK", + "PKGLOG", "NEWLOG", "SLOGTXT", "SLOGJSN") +} + +// Scenario: an unrecovered panic prints a Go-style traceback — function +// names plus file:line for the panic site and its callers — and exits 2. +const panicAcceptanceProbe = `package main + +import "runtime" + +//go:noinline +func boom() { + panic("acceptance-boom") // PANIC_MARK +} + +//go:noinline +func caller() { + boom() // PANIC_CALLER_MARK +} + +func main() { + _ = runtime.NumCPU() // keep the runtime package linked + caller() // PANIC_MAIN_MARK +} +` + +func TestCallerAcceptancePanicTraceback(t *testing.T) { + source, dir := prepareCallerAcceptanceProbe(t, panicAcceptanceProbe, "PANIC", "PANIC_CALLER", "PANIC_MAIN") + out, err := runLLGoProbe(t, dir) + if err == nil { + t.Fatalf("panic probe unexpectedly succeeded:\n%s", out) + } + for _, want := range []string{ + "panic: acceptance-boom", + "goroutine 1 [running]:", + "main.boom(...)", + "main.go:" + markerLineOf(t, source, "PANIC_MARK"), + "main.caller(...)", + "main.go:" + markerLineOf(t, source, "PANIC_CALLER_MARK"), + "main.main(...)", + "main.go:" + markerLineOf(t, source, "PANIC_MAIN_MARK"), + } { + if !strings.Contains(out, want) { + t.Fatalf("panic traceback missing %q:\n%s", want, out) + } + } +} + +// Scenario: a failing test reports the t.Errorf call's file:line, exactly +// like `go test` (the whole testing harness runs under llgo). +func TestCallerAcceptanceTestingFailure(t *testing.T) { + dir := t.TempDir() + const testSrc = `package tpkg + +import "testing" + +func TestFail(t *testing.T) { + t.Errorf("acceptance failure") // TESTING_MARK +} +` + wantLine := markerLineOf(t, testSrc, "TESTING_MARK") + if err := os.WriteFile(filepath.Join(dir, "x_test.go"), []byte(testSrc), 0644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module tpkg\n\ngo 1.21\n"), 0644); err != nil { + t.Fatal(err) + } + out, err := runLLGoInModule(t, dir, "test", ".") + if err == nil { + t.Fatalf("failing test unexpectedly passed:\n%s", out) + } + if !strings.Contains(out, "x_test.go:"+wantLine+": acceptance failure") { + t.Fatalf("testing failure missing exact location:\n%s", out) + } + if !strings.Contains(out, "--- FAIL: TestFail") { + t.Fatalf("testing failure missing FAIL header:\n%s", out) + } +} + +// Scenario grab-bag verified against gc: caller info from goroutines, from +// init, from deferred functions; FuncForPC names for methods, closures and +// generic instantiations; and the errors-with-stack pattern (capture pcs at +// error creation, symbolize when printing — the zap/sentry idiom). +const introspectionAcceptanceProbe = `package main + +import ( + "os" + "runtime" + "strconv" + "strings" + "sync" +) + +func expectLine(what string, line, want int) { + if line != want { + panic("bad " + what + " line: " + strconv.Itoa(line) + " want " + strconv.Itoa(want)) + } +} + +func expectFunc(what, got, want string) { + if got != want { + panic("bad " + what + ": " + got + " want " + want) + } +} + +var initLine int + +func init() { + _, _, initLine, _ = runtime.Caller(0) // INIT_MARK +} + +type receiver struct{} + +// Every probe body writes a distinct global: identical bodies would be +// folded to one address by linker ICF and FuncForPC could only report the +// surviving symbol (observed: value/pointer/closure/generic all folding). +var sinkValue, sinkPointer, sinkClosure, sinkGeneric int + +//go:noinline +func (receiver) value() uintptr { sinkValue++; pc, _, _, _ := runtime.Caller(0); return pc } + +//go:noinline +func (*receiver) pointer() uintptr { sinkPointer++; pc, _, _, _ := runtime.Caller(0); return pc } + +//go:noinline +func generic[T any](v T) uintptr { sinkGeneric++; pc, _, _, _ := runtime.Caller(0); return pc } + +type stackErr struct { + msg string + pcs [8]uintptr + n int +} + +func (e *stackErr) Error() string { return e.msg } + +//go:noinline +func newStackErr(msg string) *stackErr { + e := &stackErr{msg: msg} + e.n = runtime.Callers(1, e.pcs[:]) + return e +} + +func main() { + expectLine("init caller", initLine, INIT_LINE) + + var wg sync.WaitGroup + wg.Add(1) + var gLine int + var gFile string + go func() { + defer wg.Done() + _, gFile, gLine, _ = runtime.Caller(0) // GOROUTINE_MARK + }() + wg.Wait() + expectLine("goroutine caller", gLine, GOROUTINE_LINE) + if !strings.HasSuffix(gFile, "main.go") { + panic("bad goroutine caller file: " + gFile) + } + + var dLine int + func() { + defer func() { + _, _, dLine, _ = runtime.Caller(0) // DEFER_MARK + }() + }() + expectLine("deferred caller", dLine, DEFER_LINE) + + var r receiver + expectFunc("value method", runtime.FuncForPC(r.value()).Name(), "main.receiver.value") + expectFunc("pointer method", runtime.FuncForPC((&r).pointer()).Name(), "main.(*receiver).pointer") + closure := func() uintptr { sinkClosure++; pc, _, _, _ := runtime.Caller(0); return pc } + // LLGo names anonymous functions pkg.fn$N (gc uses pkg.fn.funcN); + // accept both — the P4 name work decides whether to normalize. + name := runtime.FuncForPC(closure()).Name() + if !strings.HasPrefix(name, "main.main.func") && !strings.HasPrefix(name, "main.main$") { + panic("bad closure name: " + name) + } + gname := runtime.FuncForPC(generic(0)).Name() + if !strings.HasPrefix(gname, "main.generic") { + panic("bad generic name: " + gname) + } + + err := newStackErr("wrapped") // STACKERR_MARK + frames := runtime.CallersFrames(err.pcs[:err.n]) + found := false + for { + frame, more := frames.Next() + if frame.Function == "main.main" { + expectLine("stack error capture", frame.Line, STACKERR_LINE) + found = true + } + if !more { + break + } + } + if !found { + panic("stack error: main.main frame missing") + } + + os.Stdout.WriteString("INTROSPECTION_OK\n") +} +` + +func TestCallerAcceptanceIntrospection(t *testing.T) { + runCallerAcceptanceProbe(t, introspectionAcceptanceProbe, "INTROSPECTION_OK", + "INIT", "GOROUTINE", "DEFER", "STACKERR") +} + +// --- harness --- + +// prepareCallerAcceptanceProbe substitutes NAME_LINE placeholders with the +// line numbers of the corresponding NAME_MARK comments and writes main.go +// into a temp dir. Returns the final source and the dir. +func prepareCallerAcceptanceProbe(t *testing.T, source string, names ...string) (string, string) { + t.Helper() + for _, name := range names { + line := markerLine(source, name+"_MARK") + if line == 0 { + t.Fatalf("marker %s_MARK not found", name) + } + source = strings.ReplaceAll(source, name+"_LINE", strconv.Itoa(line)) + } + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, "main.go"), []byte(source), 0644); err != nil { + t.Fatal(err) + } + return source, dir +} + +func markerLineOf(t *testing.T, source, marker string) string { + t.Helper() + line := markerLine(source, marker) + if line == 0 { + t.Fatalf("marker %s not found", marker) + } + return strconv.Itoa(line) +} + +func runLLGoProbe(t *testing.T, dir string) (string, error) { + t.Helper() + repoRoot := findStringConversionRepoRoot(t) + t.Setenv("LLGO_ROOT", repoRoot) + cmd := exec.Command("go", "run", "./cmd/llgo", "run", "-a", filepath.Join(dir, "main.go")) + cmd.Dir = repoRoot + out, err := cmd.CombinedOutput() + return string(out), err +} + +func runCallerAcceptanceProbe(t *testing.T, source, okMarker string, names ...string) { + t.Helper() + _, dir := prepareCallerAcceptanceProbe(t, source, names...) + out, err := runLLGoProbe(t, dir) + if err != nil { + t.Fatalf("acceptance probe failed: %v\n%s", err, out) + } + if !strings.Contains(out, okMarker) { + t.Fatalf("acceptance probe missing %s:\n%s", okMarker, out) + } +} + +// Scenario: a module-named main package must still report gc's "main." +// prefix in every runtime name (frame filters match on it); the other +// probes run as command-line-arguments and cannot catch a regression here. +func TestCallerAcceptanceModuleMainNaming(t *testing.T) { + dir := t.TempDir() + const src = `package main + +import ( + "os" + "runtime" + "strings" +) + +//go:noinline +func here() (string, bool) { + pc, _, _, ok := runtime.Caller(0) + if !ok { + return "", false + } + return runtime.FuncForPC(pc).Name(), true +} + +func main() { + name, ok := here() + if !ok || name != "main.here" { + panic("bad module-main name: " + name) + } + var pcs [8]uintptr + frames := runtime.CallersFrames(pcs[:runtime.Callers(0, pcs[:])]) + foundMain := false + for { + f, more := frames.Next() + if strings.HasPrefix(f.Function, "mymainmod.") { + panic("module path leaked into frame name: " + f.Function) + } + if f.Function == "main.main" { + foundMain = true + } + if !more { + break + } + } + if !foundMain { + panic("main.main frame missing") + } + os.Stdout.WriteString("MODMAIN_OK\n") +} +` + writeCallerAcceptanceModule(t, dir, map[string]string{ + "main.go": src, + "go.mod": "module mymainmod\n\ngo 1.21\n", + }) + out, err := runLLGoInModule(t, dir, "run", ".") + if err != nil { + t.Fatalf("module-main probe failed: %v\n%s", err, out) + } + if !strings.Contains(out, "MODMAIN_OK") { + t.Fatalf("module-main probe missing marker:\n%s", out) + } +} + +// Scenario: a hardware fault inside C code called from Go (NULL store in a +// C helper) converts to a Go panic that recover observes with gc's error +// text; the process must not die on the raw signal. +func TestCallerAcceptanceCFaultRecover(t *testing.T) { + dir := t.TempDir() + const src = `package main + +import ( + "fmt" + "os" + _ "unsafe" +) + +const ( + LLGoFiles = "wrap/fault.c" +) + +//go:linkname cexcSegv C.cexc_segv +func cexcSegv(depth int32) + +//go:noinline +func viaGo() { + cexcSegv(2) +} + +func main() { + defer func() { + r := recover() + if r == nil { + fmt.Println("no panic") + return + } + err, ok := r.(error) + if !ok || err.Error() != "runtime error: invalid memory address or nil pointer dereference" { + panic(r) + } + os.Stdout.WriteString("CFAULT_OK\n") + }() + viaGo() +} +` + const csrc = `#include + +/* volatile: a bare NULL store is UB and clang would propagate it into + * "unreachable", turning the recursion into an infinite loop. */ +static int32_t *volatile cexc_null; +volatile int32_t cexc_marks; + +static void cexc_leaf(void) { *cexc_null = 42; } + +static void cexc_mid(int32_t depth) { + if (depth > 0) { + cexc_mid(depth - 1); + cexc_marks++; + return; + } + cexc_leaf(); + cexc_marks++; +} + +void cexc_segv(int32_t depth) { + cexc_mid(depth); + cexc_marks++; +} +` + writeCallerAcceptanceModule(t, dir, map[string]string{ + "main.go": src, + "go.mod": "module cfault\n\ngo 1.21\n", + "wrap/fault.c": csrc, + }) + out, err := runLLGoInModule(t, dir, "run", ".") + if err != nil { + t.Fatalf("C fault probe failed (process died on the signal?): %v\n%s", err, out) + } + if !strings.Contains(out, "CFAULT_OK") { + t.Fatalf("C fault probe missing marker:\n%s", out) + } +} + +func writeCallerAcceptanceModule(t *testing.T, dir string, files map[string]string) { + t.Helper() + for name, content := range files { + path := filepath.Join(dir, name) + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(path, []byte(content), 0644); err != nil { + t.Fatal(err) + } + } +} + +var ( + acceptanceLLGoOnce sync.Once + acceptanceLLGoBin string + acceptanceLLGoErr string +) + +// runLLGoInModule builds the llgo binary once per test process and runs it +// with the module directory as the working directory (llgo resolves +// packages relative to the cwd, and `go run` refuses directories outside +// its own module). +func runLLGoInModule(t *testing.T, dir string, args ...string) (string, error) { + t.Helper() + repoRoot := findStringConversionRepoRoot(t) + t.Setenv("LLGO_ROOT", repoRoot) + acceptanceLLGoOnce.Do(func() { + tmp, err := os.MkdirTemp("", "llgo-acceptance-bin") + if err != nil { + acceptanceLLGoErr = err.Error() + return + } + bin := filepath.Join(tmp, "llgo") + build := exec.Command("go", "build", "-o", bin, "./cmd/llgo") + build.Dir = repoRoot + if bout, berr := build.CombinedOutput(); berr != nil { + acceptanceLLGoErr = berr.Error() + "\n" + string(bout) + return + } + acceptanceLLGoBin = bin + }) + if acceptanceLLGoErr != "" { + t.Fatalf("building llgo failed: %s", acceptanceLLGoErr) + } + cmd := exec.Command(acceptanceLLGoBin, args...) + cmd.Dir = dir + out, err := cmd.CombinedOutput() + return string(out), err +}