diff --git a/cmd/symbolization/main.go b/cmd/symbolization/main.go new file mode 100644 index 0000000000..d47d3e7808 --- /dev/null +++ b/cmd/symbolization/main.go @@ -0,0 +1,108 @@ +package main + +import ( + "context" + "fmt" + "log" + + pprof "github.com/google/pprof/profile" + + "github.com/grafana/pyroscope/pkg/experiment/symbolizer" +) + +const ( + debuginfodBaseURL = "https://debuginfod.elfutils.org" + buildID = "2fa2055ef20fabc972d5751147e093275514b142" +) + +func main() { + client := symbolizer.NewDebuginfodClient(debuginfodBaseURL, nil) + + // Alternatively, use a local debug info file: + //client := &localDebuginfodClient{debugFilePath: "/path/to/your/debug/file"} + + s := symbolizer.NewSymbolizer(client, nil, nil) + ctx := context.Background() + + _, err := client.FetchDebuginfo(buildID) + if err != nil { + log.Fatalf("Failed to fetch debug info: %v", err) + } + //defer os.Remove(debugFilePath) + + // Create a request to symbolize specific addresses + req := symbolizer.Request{ + BuildID: buildID, + Mappings: []symbolizer.RequestMapping{ + { + Locations: []*symbolizer.Location{ + { + Address: 0x1500, + Mapping: &pprof.Mapping{}, + }, + { + Address: 0x3c5a, + Mapping: &pprof.Mapping{}, + }, + { + Address: 0x2745, + Mapping: &pprof.Mapping{}, + }, + }, + }, + }, + } + + if err := s.Symbolize(ctx, req); err != nil { + log.Fatalf("Failed to symbolize: %v", err) + } + + fmt.Println("Symbolization Results:") + fmt.Printf("Build ID: %s\n", buildID) + fmt.Println("----------------------------------------") + + for i, mapping := range req.Mappings { + fmt.Printf("Mapping #%d:\n", i+1) + for _, loc := range mapping.Locations { + fmt.Printf("\nAddress: 0x%x\n", loc.Address) + if len(loc.Lines) == 0 { + fmt.Println(" No symbolization information found") + continue + } + + for j, line := range loc.Lines { + fmt.Printf(" Line %d:\n", j+1) + if line.Function != nil { + fmt.Printf(" Function: %s\n", line.Function.Name) + fmt.Printf(" File: %s\n", line.Function.Filename) + fmt.Printf(" Line: %d\n", line.Line) + fmt.Printf(" StartLine: %d\n", line.Function.StartLine) + } else { + fmt.Println(" No function information available") + } + } + fmt.Println("----------------------------------------") + } + } + + // Alternatively: Symbolize all addresses in the binary + // Note: Comment out the above specific symbolization when using this + // as it's a different approach meant for exploring all available symbols + //if err := symbolizer.SymbolizeAll(ctx, buildID); err != nil { + // log.Fatalf("Failed to symbolize all addresses: %v", err) + //} + + fmt.Println("\nSymbolization completed successfully.") +} + +// localDebuginfodClient provides a way to use local debug info files instead of fetching from a server +// +//nolint:all +type localDebuginfodClient struct { + debugFilePath string +} + +//nolint:all +func (c *localDebuginfodClient) FetchDebuginfo(buildID string) (string, error) { + return c.debugFilePath, nil +} diff --git a/go.mod b/go.mod index 9aa176ed9d..1f37dad4bf 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( github.com/felixge/fgprof v0.9.4-0.20221116204635-ececf7638e93 github.com/felixge/httpsnoop v1.0.4 github.com/fsnotify/fsnotify v1.7.0 + github.com/go-delve/delve v1.23.1 github.com/go-kit/log v0.2.1 github.com/gogo/protobuf v1.3.2 github.com/gogo/status v1.1.1 diff --git a/go.sum b/go.sum index eaf480caa4..2354768b56 100644 --- a/go.sum +++ b/go.sum @@ -241,6 +241,8 @@ github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nos github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/fullstorydev/emulators/storage v0.0.0-20240401123056-edc69752f474 h1:TufioMBjkJ6/Oqmlye/ReuxHFS35HyLmypj/BNy/8GY= github.com/fullstorydev/emulators/storage v0.0.0-20240401123056-edc69752f474/go.mod h1:PQwxF4UU8wuL+srGxr3BOhIW5zXqgucwVlO/nPZLsxw= +github.com/go-delve/delve v1.23.1 h1:MtZ13ppptttkqSuvVnwJ5CPhIAzDiOwRrYuCk3ES7fU= +github.com/go-delve/delve v1.23.1/go.mod h1:S3SLuEE2mn7wipKilTvk1p9HdTMnXXElcEpiZ+VcuqU= github.com/go-fonts/dejavu v0.3.4 h1:Qqyx9IOs5CQFxyWTdvddeWzrX0VNwUAvbmAzL0fpjbc= github.com/go-fonts/dejavu v0.3.4/go.mod h1:D1z0DglIz+lmpeNYMYlxW4r22IhcdOYnt+R3PShU/Kg= github.com/go-fonts/latin-modern v0.3.3 h1:g2xNgI8yzdNzIVm+qvbMryB6yGPe0pSMss8QT3QwlJ0= diff --git a/go.work.sum b/go.work.sum index 865979150f..bac225dc4a 100644 --- a/go.work.sum +++ b/go.work.sum @@ -628,13 +628,21 @@ github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3 github.com/coreos/go-oidc/v3 v3.5.0 h1:VxKtbccHZxs8juq7RdJntSqtXFtde9YpNpGn0yqgEHw= github.com/coreos/go-oidc/v3 v3.5.0/go.mod h1:ecXRtV4romGPeO6ieExAsUK9cb/3fp9hXNz1tlv8PIM= github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/cosiner/argv v0.1.0 h1:BVDiEL32lwHukgJKP87btEPenzrrHUjajs/8yzaqcXg= +github.com/cosiner/argv v0.1.0/go.mod h1:EusR6TucWKX+zFgtdUsKT2Cvg45K5rtpCcWz4hK06d8= +github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9 h1:uDmaGzcdjhF4i/plgjmEsriH11Y0o7RKapEf/LDaM3w= github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/creack/pty v1.1.20 h1:VIPb/a2s17qNeQgDnkfZC35RScx+blkKF8GV68n80J4= +github.com/creack/pty v1.1.20/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= github.com/cristalhq/hedgedhttp v0.9.1 h1:g68L9cf8uUyQKQJwciD0A1Vgbsz+QgCjuB1I8FAsCDs= github.com/cristalhq/hedgedhttp v0.9.1/go.mod h1:XkqWU6qVMutbhW68NnzjWrGtH8NUx1UfYqGYtHVKIsI= github.com/davecgh/go-xdr v0.0.0-20161123171359-e6a2ba005892 h1:qg9VbHo1TlL0KDM0vYvBG9EY0X0Yku5WYIPoFWt8f6o= github.com/davecgh/go-xdr v0.0.0-20161123171359-e6a2ba005892/go.mod h1:CTDl0pzVzE5DEzZhPfvhY/9sPFMQIxaJ9VAMs9AagrE= +github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d h1:hUWoLdw5kvo2xCsqlsIBMvWUc1QCSsCYD2J2+Fg6YoU= +github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d/go.mod h1:C7Es+DLenIpPc9J6IYw4jrK0h7S9bKj4DNl8+KxGEXU= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/digitalocean/godo v1.104.1/go.mod h1:VAI/L5YDzMuPRU01lEEUSQ/sp5Z//1HnnFv/RBTEdbg= @@ -658,6 +666,8 @@ github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2 github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32 h1:Mn26/9ZMNWSw9C9ERFA1PUxfmGpolnw2v0bKOREu5ew= github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I= +github.com/go-delve/liner v1.2.3-0.20231231155935-4726ab1d7f62 h1:IGtvsNyIuRjl04XAOFGACozgUD7A82UffYxZt4DWbvA= +github.com/go-delve/liner v1.2.3-0.20231231155935-4726ab1d7f62/go.mod h1:biJCRbqp51wS+I92HMqn5H8/A0PAhxn2vyOT+JqhiGI= github.com/go-fonts/stix v0.2.2 h1:v9krocr13J1llaOHLEol1eaHsv8S43UuFX/1bFgEJJ4= github.com/go-fonts/stix v0.2.2/go.mod h1:SUxggC9dxd/Q+rb5PkJuvfvTbOPtNc2Qaua00fIp9iU= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1 h1:QbL/5oDUmRBzO9/Z7Seo6zf912W/a6Sr4Eu0G/3Jho0= @@ -739,6 +749,8 @@ github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9 github.com/google/flatbuffers v2.0.8+incompatible h1:ivUb1cGomAB101ZM1T0nOiWz9pSrTMoa9+EiY7igmkM= github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-dap v0.12.0 h1:rVcjv3SyMIrpaOoTAdFDyHs99CwVOItIJGKLQFQhNeM= +github.com/google/go-dap v0.12.0/go.mod h1:tNjCASCm5cqePi/RVXXWEVqtnNLV1KTWtYOqu6rZNzc= github.com/google/go-pkcs11 v0.2.1-0.20230907215043-c6f79328ddf9 h1:OF1IPgv+F4NmqmJ98KTjdN97Vs1JxDPB3vbmYzV2dpk= github.com/google/go-pkcs11 v0.2.1-0.20230907215043-c6f79328ddf9/go.mod h1:6eQoGcuNJpa7jnd5pMGdkSaQpNDYvPlXWMcjXXThLlY= github.com/google/pprof v0.0.0-20230926050212-f7f687d19a98/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= @@ -793,6 +805,8 @@ github.com/hudl/fargo v1.4.0/go.mod h1:9Ai6uvFy5fQNq6VPKtg+Ceq1+eTY4nKUlR2JElEOc github.com/iancoleman/strcase v0.2.0 h1:05I4QRnGpI0m37iZQRuskXh+w77mr6Z41lwQzuHLwW0= github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab h1:HqW4xhhynfjrtEiiSGcQUd6vrK23iMam1FO8rI7mwig= github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= github.com/ionos-cloud/sdk-go/v6 v6.1.9/go.mod h1:EzEgRIDxBELvfoa/uBN0kOQaqovLjUWEB7iW4/Q+t4k= @@ -963,6 +977,8 @@ github.com/rs/cors v1.10.1 h1:L0uuZVXIKlI1SShY2nhFfo44TYvDPQ1w4oFkUJNfhyo= github.com/rs/cors v1.10.1/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww= github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245 h1:K1Xf3bKttbF+koVGaX5xngRIZ5bVjbmPnaxE/dR08uY= github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk= github.com/ryanuber/columnize v2.1.0+incompatible h1:j1Wcmh8OrK4Q7GXY+V7SVSY8nUWQxHW5TkBe7YUl+2s= @@ -993,6 +1009,8 @@ github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkU github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= github.com/spf13/cobra v0.0.3 h1:ZlrZ4XsMRm04Fr5pSFxBgfND2EBVa1nLpiy1stUsX/8= +github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= +github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= @@ -1082,9 +1100,13 @@ go.opentelemetry.io/otel/trace v1.17.0/go.mod h1:I/4vKTgFclIsXRVucpH25X0mpFSczM7 go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ= go.opentelemetry.io/otel/trace v1.22.0/go.mod h1:RbbHXVqKES9QhzZq/fE5UnOSILqRt40a21sPw2He1xo= go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= +go.starlark.net v0.0.0-20231101134539-556fd59b42f6 h1:+eC0F/k4aBLC4szgOcjd7bDTEnpxADJyWJE0yowgM3E= +go.starlark.net v0.0.0-20231101134539-556fd59b42f6/go.mod h1:LcLNIzVOMp4oV+uusnpk+VU+SzXaJakUuBjoCSWH5dM= go.uber.org/atomic v1.10.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= +golang.org/x/arch v0.6.0 h1:S0JTfE48HbRj80+4tbvZDYsJ3tGv6BUU3XxyZ7CirAc= +golang.org/x/arch v0.6.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20200414173820-0848c9571904/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= diff --git a/pkg/experiment/query_backend/backend.go b/pkg/experiment/query_backend/backend.go index e8e7a9f43c..30099b3b1e 100644 --- a/pkg/experiment/query_backend/backend.go +++ b/pkg/experiment/query_backend/backend.go @@ -4,27 +4,31 @@ import ( "context" "flag" "fmt" - "github.com/go-kit/log" "github.com/grafana/dskit/grpcclient" "github.com/grafana/dskit/services" + "github.com/grafana/pyroscope/pkg/objstore/client" "github.com/opentracing/opentracing-go" "github.com/prometheus/client_golang/prometheus" "golang.org/x/sync/errgroup" metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" + "github.com/grafana/pyroscope/pkg/experiment/symbolizer" "github.com/grafana/pyroscope/pkg/util" ) type Config struct { Address string `yaml:"address"` GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config" doc:"description=Configures the gRPC client used to communicate between the query-frontends and the query-schedulers."` + Symbolizer symbolizer.Config `yaml:"symbolizer"` + DebugStorage client.Config `yaml:"debug_storage"` } func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.StringVar(&cfg.Address, "query-backend.address", "localhost:9095", "") cfg.GRPCClientConfig.RegisterFlagsWithPrefix("query-backend.grpc-client-config", f) + cfg.Symbolizer.RegisterFlagsWithPrefix("query-backend.symbolizer", f) } func (cfg *Config) Validate() error { @@ -48,6 +52,8 @@ type QueryBackend struct { backendClient QueryHandler blockReader QueryHandler + + symbolizer *symbolizer.Symbolizer } func New( @@ -57,13 +63,29 @@ func New( backendClient QueryHandler, blockReader QueryHandler, ) (*QueryBackend, error) { + var sym *symbolizer.Symbolizer + if config.Symbolizer.DebuginfodURL != "" { + var err error + sym, err = symbolizer.NewFromConfig(context.Background(), config.Symbolizer, reg) + if err != nil { + return nil, fmt.Errorf("create symbolizer: %w", err) + } + } + q := QueryBackend{ config: config, logger: logger, reg: reg, backendClient: backendClient, blockReader: blockReader, + symbolizer: sym, + } + + // Pass symbolizer to BlockReader if it's the right type + if br, ok := blockReader.(*BlockReader); ok { + br.symbolizer = sym } + q.service = services.NewIdleService(q.starting, q.stopping) return &q, nil } diff --git a/pkg/experiment/query_backend/block_reader.go b/pkg/experiment/query_backend/block_reader.go index 059038ecf9..9d67567461 100644 --- a/pkg/experiment/query_backend/block_reader.go +++ b/pkg/experiment/query_backend/block_reader.go @@ -16,6 +16,7 @@ import ( queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" "github.com/grafana/pyroscope/pkg/experiment/block" + "github.com/grafana/pyroscope/pkg/experiment/symbolizer" "github.com/grafana/pyroscope/pkg/objstore" "github.com/grafana/pyroscope/pkg/util" ) @@ -48,6 +49,8 @@ type BlockReader struct { log log.Logger storage objstore.Bucket + symbolizer *symbolizer.Symbolizer + // TODO: // - Use a worker pool instead of the errgroup. // - Reusable query context. @@ -83,7 +86,7 @@ func (b *BlockReader) Invoke( object := block.NewObject(b.storage, md) for _, ds := range md.Datasets { dataset := block.NewDataset(ds, object) - qcs = append(qcs, newQueryContext(ctx, b.log, r, agg, dataset)) + qcs = append(qcs, newQueryContext(ctx, b.log, r, agg, dataset, b.symbolizer)) } } diff --git a/pkg/experiment/query_backend/query.go b/pkg/experiment/query_backend/query.go index 1243e9f31e..7618fdd944 100644 --- a/pkg/experiment/query_backend/query.go +++ b/pkg/experiment/query_backend/query.go @@ -11,6 +11,7 @@ import ( queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" "github.com/grafana/pyroscope/pkg/experiment/block" + "github.com/grafana/pyroscope/pkg/experiment/symbolizer" ) // TODO(kolesnikovae): We have a procedural definition of our queries, @@ -71,12 +72,13 @@ func registerQueryType( } type queryContext struct { - ctx context.Context - log log.Logger - req *request - agg *reportAggregator - ds *block.Dataset - err error + ctx context.Context + log log.Logger + req *request + agg *reportAggregator + ds *block.Dataset + err error + symbolizer *symbolizer.Symbolizer } func newQueryContext( @@ -85,13 +87,15 @@ func newQueryContext( req *request, agg *reportAggregator, ds *block.Dataset, + symbolizer *symbolizer.Symbolizer, ) *queryContext { return &queryContext{ - ctx: ctx, - log: log, - req: req, - agg: agg, - ds: ds, + ctx: ctx, + log: log, + req: req, + agg: agg, + ds: ds, + symbolizer: symbolizer, } } diff --git a/pkg/experiment/query_backend/query_tree.go b/pkg/experiment/query_backend/query_tree.go index 21ad773290..0de88417bf 100644 --- a/pkg/experiment/query_backend/query_tree.go +++ b/pkg/experiment/query_backend/query_tree.go @@ -56,7 +56,9 @@ func queryTree(q *queryContext, query *queryv1.Query) (*queryv1.Report, error) { defer runutil.CloseWithErrCapture(&err, profiles, "failed to close profile stream") resolver := symdb.NewResolver(q.ctx, q.ds.Symbols(), - symdb.WithResolverMaxNodes(query.Tree.GetMaxNodes())) + symdb.WithResolverMaxNodes(query.Tree.GetMaxNodes()), + symdb.WithSymbolizer(q.symbolizer)) + defer resolver.Release() if len(spanSelector) > 0 { diff --git a/pkg/experiment/symbolizer/addrmapper.go b/pkg/experiment/symbolizer/addrmapper.go new file mode 100644 index 0000000000..1e4e7f9637 --- /dev/null +++ b/pkg/experiment/symbolizer/addrmapper.go @@ -0,0 +1,190 @@ +package symbolizer + +import ( + "debug/elf" + "fmt" +) + +// BinaryLayout contains the information needed to translate between runtime addresses +// and the addresses in the debug information. This is necessary because: +// 1. Executables (ET_EXEC) use fixed addresses but may need segment offset adjustments +// 2. Shared libraries (ET_DYN) can be loaded at any address, requiring base address calculations +// 3. Relocatable files (ET_REL) need special handling for their relocations +type BinaryLayout struct { + ElfType uint16 + ProgramHeaders []MemoryRegion +} + +// MemoryRegion represents a loadable segment in the ELF file. +// These segments define how the program should be loaded into memory: +// - Off: where the segment data starts in the file +// - Vaddr: the virtual address where the segment should be loaded +// - Memsz: how much memory the segment occupies when loaded +type MemoryRegion struct { + Off uint64 // File offset + Vaddr uint64 // Virtual address + Filesz uint64 // Size in file + Memsz uint64 // Size in memory (may be larger than Filesz due to .bss) + Type uint32 +} + +func ExecutableInfoFromELF(f *elf.File) (*BinaryLayout, error) { + loadableSegments := make([]MemoryRegion, 0, len(f.Progs)) + for _, segment := range f.Progs { + if segment.Type == elf.PT_LOAD { + loadableSegments = append(loadableSegments, MemoryRegion{ + Off: segment.Off, + Vaddr: segment.Vaddr, + Filesz: segment.Filesz, + Memsz: segment.Memsz, + Type: uint32(segment.Type), + }) + } + } + + return &BinaryLayout{ + ElfType: uint16(f.Type), + ProgramHeaders: loadableSegments, + }, nil +} + +// MapRuntimeAddress translates a runtime address to its corresponding address +// in the debug information. This translation is necessary because: +// - The program might be loaded at a different address than it was linked for +// - Different segments might need different adjustments +// - Various ELF types (EXEC, DYN, REL) handle addressing differently +func MapRuntimeAddress(runtimeAddr uint64, ei *BinaryLayout, m Mapping) (uint64, error) { + baseOffset, err := CalculateBase(ei, m, runtimeAddr) + if err != nil { + return runtimeAddr, fmt.Errorf("calculate base offset: %w", err) + } + + return runtimeAddr - baseOffset, nil +} + +// CalculateBase determines the base address adjustment needed for address translation. +// The calculation varies depending on the ELF type: +// - ET_EXEC: Uses fixed addresses with potential segment adjustments +// - ET_DYN: Can be loaded anywhere, needs runtime base address adjustment +// - ET_REL: Requires relocation processing +func CalculateBase(ei *BinaryLayout, m Mapping, addr uint64) (uint64, error) { + segment, err := ei.FindProgramHeader(m, addr) + if err != nil { + return 0, fmt.Errorf("find program segment: %w", err) + } + + if segment == nil { + return 0, nil + } + + // Handle special case where mapping spans entire address space + if m.Start == 0 && m.Offset == 0 && (m.Limit == ^uint64(0) || m.Limit == 0) { + return 0, nil + } + + switch elf.Type(ei.ElfType) { + case elf.ET_EXEC: + return calculateExecBase(m, segment) + case elf.ET_REL: + return calculateRelocatableBase(m) + case elf.ET_DYN: + return calculateDynamicBase(m, segment) + } + + return 0, fmt.Errorf("unsupported ELF type: %v", elf.Type(ei.ElfType)) +} + +// FindProgramHeader finds the program header containing the given address. +// It returns nil if no header is found. +func (ei *BinaryLayout) FindProgramHeader(m Mapping, addr uint64) (*MemoryRegion, error) { + // Special case: if mapping is empty (all zeros), just look for any header containing the address + if m.Start == 0 && m.Limit == 0 { + for i := range ei.ProgramHeaders { + h := &ei.ProgramHeaders[i] + if h.Type == uint32(elf.PT_LOAD) { + if h.Vaddr <= addr && addr < h.Vaddr+h.Memsz { + return h, nil + } + } + } + return nil, nil + } + + // Fast path: if address is invalid or outside reasonable range + if m.Start >= m.Limit { + return nil, fmt.Errorf("invalid mapping range: start %x >= limit %x", m.Start, m.Limit) + } + + // Special case: kernel addresses or very high addresses + if m.Limit >= (1 << 63) { + return nil, nil + } + + // No loadable segments + if len(ei.ProgramHeaders) == 0 { + return nil, nil + } + + // Calculate file offset from the address + fileOffset := addr - m.Start + m.Offset + + // Find all headers that could contain this address + var candidateHeaders []*MemoryRegion + for i := range ei.ProgramHeaders { + h := &ei.ProgramHeaders[i] + if h.Type != uint32(elf.PT_LOAD) { + continue + } + + // Check if the file offset falls within this segment + if fileOffset >= h.Off && fileOffset < h.Off+h.Memsz { + candidateHeaders = append(candidateHeaders, h) + } + } + + // No matching headers found + if len(candidateHeaders) == 0 { + return nil, nil + } + + // If only one header matches, return it + if len(candidateHeaders) == 1 { + return candidateHeaders[0], nil + } + + // Multiple headers - need to select the most appropriate one + // Choose the one with the closest starting address to our target + var bestHeader *MemoryRegion + bestDistance := uint64(^uint64(0)) // Max uint64 as initial distance + + for _, h := range candidateHeaders { + distance := addr - h.Vaddr + if distance < bestDistance { + bestDistance = distance + bestHeader = h + } + } + + return bestHeader, nil +} + +func calculateExecBase(m Mapping, h *MemoryRegion) (uint64, error) { + if h == nil { + return 0, nil + } + return m.Start - m.Offset + h.Off - h.Vaddr, nil +} + +func calculateRelocatableBase(m Mapping) (uint64, error) { + if m.Offset != 0 { + return 0, fmt.Errorf("relocatable files with non-zero offset not supported") + } + return m.Start, nil +} + +func calculateDynamicBase(m Mapping, h *MemoryRegion) (uint64, error) { + if h == nil { + return m.Start - m.Offset, nil + } + return m.Start - m.Offset + h.Off - h.Vaddr, nil +} diff --git a/pkg/experiment/symbolizer/cache.go b/pkg/experiment/symbolizer/cache.go new file mode 100644 index 0000000000..82de47fef6 --- /dev/null +++ b/pkg/experiment/symbolizer/cache.go @@ -0,0 +1,115 @@ +package symbolizer + +import ( + "context" + "fmt" + "io" + "time" + + "github.com/grafana/pyroscope/pkg/objstore" +) + +// CacheConfig holds configuration for the debug info cache +type CacheConfig struct { + Enabled bool `yaml:"enabled"` + MaxAge time.Duration `yaml:"max_age"` +} + +func NewObjstoreCache(bucket objstore.Bucket, maxAge time.Duration, metrics *Metrics) *ObjstoreCache { + return &ObjstoreCache{ + bucket: bucket, + maxAge: maxAge, + metrics: metrics, + } +} + +// DebugInfoCache handles caching of debug info files +type DebugInfoCache interface { + Get(ctx context.Context, buildID string) (io.ReadCloser, error) + Put(ctx context.Context, buildID string, reader io.Reader) error +} + +// ObjstoreCache implements DebugInfoCache using S3 storage +type ObjstoreCache struct { + bucket objstore.Bucket + maxAge time.Duration + metrics *Metrics +} + +func (c *ObjstoreCache) Get(ctx context.Context, buildID string) (io.ReadCloser, error) { + c.metrics.cacheRequestsTotal.WithLabelValues("get").Inc() + start := time.Now() + defer func() { + c.metrics.cacheOperationDuration.WithLabelValues("get").Observe(time.Since(start).Seconds()) + }() + + // First check if object exists to avoid unnecessary operations + reader, err := c.bucket.Get(ctx, buildID) + if err != nil { + if c.bucket.IsObjNotFoundErr(err) { + c.metrics.cacheMissesTotal.Inc() + return nil, err + } + c.metrics.cacheRequestErrorsTotal.WithLabelValues("get", "read_error").Inc() + return nil, fmt.Errorf("get from cache: %w", err) + } + + // Get attributes - this should use the same HEAD request that Get used + attrs, err := c.bucket.Attributes(ctx, buildID) + if err != nil { + reader.Close() + c.metrics.cacheRequestErrorsTotal.WithLabelValues("get", "attribute_error").Inc() + return nil, fmt.Errorf("get cache attributes: %w", err) + } + + // Check if expired + if time.Since(attrs.LastModified) > c.maxAge { + reader.Close() + c.metrics.cacheExpiredTotal.Inc() + + // Async deletion to not block the request + go func() { + delCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + if err = c.bucket.Delete(delCtx, buildID); err != nil { + c.metrics.cacheRequestErrorsTotal.WithLabelValues("delete", "delete_error").Inc() + } + }() + return nil, fmt.Errorf("cached object expired") + } + + c.metrics.cacheHitsTotal.Inc() + return reader, nil +} + +func (c *ObjstoreCache) Put(ctx context.Context, buildID string, reader io.Reader) error { + c.metrics.cacheRequestsTotal.WithLabelValues("put").Inc() + start := time.Now() + defer func() { + c.metrics.cacheOperationDuration.WithLabelValues("put").Observe(time.Since(start).Seconds()) + }() + + if err := c.bucket.Upload(ctx, buildID, reader); err != nil { + c.metrics.cacheRequestErrorsTotal.WithLabelValues("put", "upload_error").Inc() + return fmt.Errorf("upload to cache: %w", err) + } + + return nil +} + +// NullCache implements DebugInfoCache but performs no caching +type NullCache struct{} + +func NewNullCache() DebugInfoCache { + return &NullCache{} +} + +func (n *NullCache) Get(ctx context.Context, buildID string) (io.ReadCloser, error) { + // Always return cache miss + return nil, fmt.Errorf("cache miss") +} + +func (n *NullCache) Put(ctx context.Context, buildID string, reader io.Reader) error { + // Do nothing + return nil +} diff --git a/pkg/experiment/symbolizer/debuginfod_client.go b/pkg/experiment/symbolizer/debuginfod_client.go new file mode 100644 index 0000000000..dfbcd11739 --- /dev/null +++ b/pkg/experiment/symbolizer/debuginfod_client.go @@ -0,0 +1,93 @@ +package symbolizer + +import ( + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "regexp" + "time" +) + +type DebuginfodClient interface { + FetchDebuginfo(buildID string) (string, error) +} + +type debuginfodClient struct { + baseURL string + metrics *Metrics +} + +func NewDebuginfodClient(baseURL string, metrics *Metrics) DebuginfodClient { + return &debuginfodClient{ + baseURL: baseURL, + metrics: metrics, + } +} + +// FetchDebuginfo fetches the debuginfo file for a specific build ID. +func (c *debuginfodClient) FetchDebuginfo(buildID string) (string, error) { + c.metrics.debuginfodRequestsTotal.Inc() + start := time.Now() + + sanitizedBuildID, err := sanitizeBuildID(buildID) + if err != nil { + c.metrics.debuginfodRequestErrorsTotal.WithLabelValues("invalid_id").Inc() + return "", err + } + + url := fmt.Sprintf("%s/buildid/%s/debuginfo", c.baseURL, sanitizedBuildID) + + resp, err := http.Get(url) + if err != nil { + c.metrics.debuginfodRequestErrorsTotal.WithLabelValues("http").Inc() + c.metrics.debuginfodRequestDuration.WithLabelValues("error").Observe(time.Since(start).Seconds()) + return "", fmt.Errorf("failed to fetch debuginfod: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + c.metrics.debuginfodRequestErrorsTotal.WithLabelValues("http").Inc() + c.metrics.debuginfodRequestDuration.WithLabelValues("error").Observe(time.Since(start).Seconds()) + return "", fmt.Errorf("unexpected HTTP status: %s", resp.Status) + } + + // Record file size from Content-Length if available + if contentLength := resp.ContentLength; contentLength > 0 { + c.metrics.debuginfodFileSize.Observe(float64(contentLength)) + } + + // TODO: Avoid file operations and handle debuginfo in memory. + // Save the debuginfo to a temporary file + tempDir := os.TempDir() + filePath := filepath.Join(tempDir, fmt.Sprintf("%s.elf", sanitizedBuildID)) + outFile, err := os.Create(filePath) + if err != nil { + c.metrics.debuginfodRequestErrorsTotal.WithLabelValues("file_create").Inc() + c.metrics.debuginfodRequestDuration.WithLabelValues("error").Observe(time.Since(start).Seconds()) + return "", fmt.Errorf("failed to create temp file: %w", err) + } + defer outFile.Close() + + _, err = io.Copy(outFile, resp.Body) + if err != nil { + c.metrics.debuginfodRequestErrorsTotal.WithLabelValues("write").Inc() + c.metrics.debuginfodRequestDuration.WithLabelValues("error").Observe(time.Since(start).Seconds()) + return "", fmt.Errorf("failed to write debuginfod to file: %w", err) + } + + c.metrics.debuginfodRequestDuration.WithLabelValues("success").Observe(time.Since(start).Seconds()) + + return filePath, nil +} + +// sanitizeBuildID ensures that the buildID is a safe and valid string for use in file paths. +func sanitizeBuildID(buildID string) (string, error) { + // Allow only alphanumeric characters, dashes, and underscores. + validBuildID := regexp.MustCompile(`^[a-zA-Z0-9_-]+$`) + if !validBuildID.MatchString(buildID) { + return "", fmt.Errorf("invalid build ID: %s", buildID) + } + return buildID, nil +} diff --git a/pkg/experiment/symbolizer/dwarf.go b/pkg/experiment/symbolizer/dwarf.go new file mode 100644 index 0000000000..2274163c5d --- /dev/null +++ b/pkg/experiment/symbolizer/dwarf.go @@ -0,0 +1,373 @@ +package symbolizer + +import ( + "context" + "debug/dwarf" + "errors" + "fmt" + "io" + "sort" + + "github.com/go-delve/delve/pkg/dwarf/godwarf" + "github.com/go-delve/delve/pkg/dwarf/reader" + pprof "github.com/google/pprof/profile" +) + +// DWARFInfo implements the liner interface +type DWARFInfo struct { + debugData *dwarf.Data + lineEntries map[dwarf.Offset][]dwarf.LineEntry + subprograms map[dwarf.Offset][]*godwarf.Tree + abstractSubprograms map[dwarf.Offset]*dwarf.Entry +} + +// NewDWARFInfo creates a new liner using DWARF debug info +func NewDWARFInfo(debugData *dwarf.Data) *DWARFInfo { + return &DWARFInfo{ + debugData: debugData, + lineEntries: make(map[dwarf.Offset][]dwarf.LineEntry), + subprograms: make(map[dwarf.Offset][]*godwarf.Tree), + abstractSubprograms: make(map[dwarf.Offset]*dwarf.Entry), + } +} + +func (d *DWARFInfo) ResolveAddress(_ context.Context, addr uint64) ([]SymbolLocation, error) { + er := reader.New(d.debugData) + cu, err := er.SeekPC(addr) + if err != nil { + return nil, fmt.Errorf("no symbol information found for address 0x%x", addr) + } + if cu == nil { + return nil, errors.New("no symbol information found for address") + } + + if err := d.buildLookupTables(cu); err != nil { + return nil, err + } + + var lines []SymbolLocation + var targetTree *godwarf.Tree + for _, tree := range d.subprograms[cu.Offset] { + if tree.ContainsPC(addr) { + targetTree = tree + break + } + } + + if targetTree == nil { + return lines, nil + } + + functionName, ok := targetTree.Entry.Val(dwarf.AttrName).(string) + if !ok { + functionName = "" + } + + declLine, ok := targetTree.Entry.Val(dwarf.AttrDeclLine).(int64) + if !ok { + declLine = 0 + } + + file, line := d.findLineInfo(d.lineEntries[cu.Offset], targetTree.Ranges) + lines = append(lines, SymbolLocation{ + Function: &pprof.Function{ + Name: functionName, + Filename: file, + StartLine: declLine, + }, + Line: line, + }) + + // Enhanced inline function processing + for _, tr := range reader.InlineStack(targetTree, addr) { + + var functionName string + if tr.Tag == dwarf.TagSubprogram { + functionName, ok = targetTree.Entry.Val(dwarf.AttrName).(string) + if !ok { + functionName = "" + } + } else { + if abstractOffset, ok := tr.Entry.Val(dwarf.AttrAbstractOrigin).(dwarf.Offset); ok { + if abstractOrigin, exists := d.abstractSubprograms[abstractOffset]; exists { + functionName = d.getFunctionName(abstractOrigin) + } else { + functionName = "?" + } + } else { + functionName = "?" + } + } + + declLine, ok := tr.Entry.Val(dwarf.AttrDeclLine).(int64) + if !ok { + declLine = 0 + } + + file, line := d.findLineInfo(d.lineEntries[cu.Offset], tr.Ranges) + + lines = append(lines, SymbolLocation{ + Function: &pprof.Function{ + Name: functionName, + Filename: file, + StartLine: declLine, + }, + Line: line, + }) + } + + return lines, nil +} + +func (d *DWARFInfo) resolveFunctionName(entry *dwarf.Entry) string { + if entry == nil { + return "?" + } + + if name, ok := entry.Val(dwarf.AttrName).(string); ok { + return name + } + if name, ok := entry.Val(dwarf.AttrLinkageName).(string); ok { + return name + } + + return "?" +} + +func (d *DWARFInfo) buildLookupTables(cu *dwarf.Entry) error { + // Check if we already processed this compilation unit + if _, exists := d.lineEntries[cu.Offset]; exists { + return nil + } + + // TODO: not 100% sure about it. Review it. + // Scan all DWARF entries for abstract subprograms before processing this compilation unit. + // This scan is necessary because DWARF debug info can contain cross-compilation unit + // references, particularly for inlined functions. When a function is inlined, its + // definition (the abstract entry) may be in one compilation unit while its usage + // (via AttrAbstractOrigin) can be in another. By scanning all entries upfront, + // we ensure we can resolve these cross-unit references when they occur. + // + // For example, when a C++ standard library function is inlined (like printf from stdio.h), + // its abstract entry might be in the compilation unit for stdio.h, but we need to + // resolve its name when we find it inlined in our program's compilation unit. + if len(d.abstractSubprograms) == 0 { + if err := d.scanAbstractSubprograms(); err != nil { + return fmt.Errorf("scan abstract subprograms: %w", err) + } + } + + // Process line entries first + if err := d.processLineEntries(cu); err != nil { + return fmt.Errorf("process line entries: %w", err) + } + + // Process subprograms and their trees + if err := d.processSubprogramEntries(cu); err != nil { + return fmt.Errorf("process subprogram entries: %w", err) + } + + return nil +} + +func (d *DWARFInfo) processLineEntries(cu *dwarf.Entry) error { + lr, err := d.debugData.LineReader(cu) + if err != nil { + return fmt.Errorf("create line reader: %w", err) + } + if lr == nil { + return errors.New("no line reader available") + } + + entries := make([]dwarf.LineEntry, 0) + for { + var entry dwarf.LineEntry + err := lr.Next(&entry) + if err != nil { + if err == io.EOF { + break + } + return fmt.Errorf("read line entry: %w", err) + } + + // Only store statement entries + if entry.IsStmt { + entries = append(entries, entry) + } + } + + d.lineEntries[cu.Offset] = entries + return nil +} + +func (d *DWARFInfo) processSubprogramEntries(cu *dwarf.Entry) error { + reader := d.debugData.Reader() + reader.Seek(cu.Offset) + + entry, err := reader.Next() + if err != nil { + return fmt.Errorf("read initial entry: %w", err) + } + if entry == nil || entry.Tag != dwarf.TagCompileUnit { + return fmt.Errorf("unexpected entry type at CU offset: %v", cu.Offset) + } + + subprograms := make([]*godwarf.Tree, 0) + for { + entry, err := reader.Next() + if err != nil { + if err == io.EOF { + break + } + return fmt.Errorf("read entry: %w", err) + } + if entry == nil || entry.Tag == dwarf.TagCompileUnit { + break + } + + if entry.Tag != dwarf.TagSubprogram { + continue + } + + // Check for abstract entries first + isAbstract := false + for _, field := range entry.Field { + if field.Attr == dwarf.AttrInline { + d.abstractSubprograms[entry.Offset] = entry + isAbstract = true + break + } + } + + //Skip if this was an abstract entry + if isAbstract { + continue + } + + // Extract the subprogram tree + tree, err := godwarf.LoadTree(entry.Offset, d.debugData, 0) + if err != nil { + return fmt.Errorf("load subprogram tree: %w", err) + } + + subprograms = append(subprograms, tree) + } + + d.subprograms[cu.Offset] = subprograms + return nil +} + +func (d *DWARFInfo) findLineInfo(entries []dwarf.LineEntry, ranges [][2]uint64) (string, int64) { + sort.Slice(entries, func(i, j int) bool { + return entries[i].Address < entries[j].Address + }) + + // Try to find an entry that contains our target address + targetAddr := ranges[0][0] + for _, entry := range entries { + if entry.Address >= targetAddr && entry.Address < ranges[0][1] { + if entry.File != nil { + return entry.File.Name, int64(entry.Line) + } + } + } + + // Find the closest entry before our target address + var lastEntry *dwarf.LineEntry + for i := range entries { + if entries[i].Address > targetAddr { + break + } + lastEntry = &entries[i] + } + + if lastEntry != nil && lastEntry.File != nil { + return lastEntry.File.Name, int64(lastEntry.Line) + } + + return "?", 0 +} + +func (d *DWARFInfo) getFunctionName(entry *dwarf.Entry) string { + name := "?" + ok := false + if entry != nil { + for _, field := range entry.Field { + if field.Attr == dwarf.AttrName { + name, ok = field.Val.(string) + if !ok { + name = "?" + } + } + } + } + return name +} + +func (d *DWARFInfo) SymbolizeAllAddresses() map[uint64][]SymbolLocation { + results := make(map[uint64][]SymbolLocation) + + // Get all compilation units + reader := d.debugData.Reader() + for { + entry, err := reader.Next() + if err != nil || entry == nil { + break + } + + if entry.Tag != dwarf.TagCompileUnit { + continue + } + + // Get ranges for this compilation unit + ranges, err := d.debugData.Ranges(entry) + if err != nil { + fmt.Printf("Warning: Failed to get ranges for CU: %v\n", err) + continue + } + + for _, rng := range ranges { + // Skip invalid ranges + if rng[0] >= rng[1] { + continue + } + + // Sample multiple points in this range + addresses := []uint64{ + rng[0], // start + rng[0] + (rng[1]-rng[0])/2, // middle + rng[1] - 1, // end (exclusive) + } + + for _, addr := range addresses { + lines, err := d.ResolveAddress(context.Background(), addr) + if err != nil { + continue + } + + if len(lines) > 0 { + results[addr] = lines + } + } + } + } + + return results +} + +func (d *DWARFInfo) scanAbstractSubprograms() error { + reader := d.debugData.Reader() + // Scan from the start, don't stop at first CU + for { + entry, err := reader.Next() + if err != nil || entry == nil { + break + } + + if entry.Tag == dwarf.TagSubprogram { + // Store ALL subprograms, not just inline ones + d.abstractSubprograms[entry.Offset] = entry + } + } + return nil +} diff --git a/pkg/experiment/symbolizer/metrics.go b/pkg/experiment/symbolizer/metrics.go new file mode 100644 index 0000000000..23ee7b74dc --- /dev/null +++ b/pkg/experiment/symbolizer/metrics.go @@ -0,0 +1,160 @@ +package symbolizer + +import "github.com/prometheus/client_golang/prometheus" + +type Metrics struct { + registerer prometheus.Registerer + + // Debuginfod metrics + debuginfodRequestDuration *prometheus.HistogramVec + debuginfodFileSize prometheus.Histogram + debuginfodRequestsTotal prometheus.Counter + debuginfodRequestErrorsTotal *prometheus.CounterVec + + // Cache metrics + cacheRequestsTotal *prometheus.CounterVec + cacheRequestErrorsTotal *prometheus.CounterVec + cacheHitsTotal prometheus.Counter + cacheMissesTotal prometheus.Counter + cacheOperationDuration *prometheus.HistogramVec + cacheExpiredTotal prometheus.Counter + + // Symbolization metrics + //symbolizationDuration prometheus.Histogram + //symbolizationLocations *prometheus.CounterVec + symbolizationRequestsTotal prometheus.Counter + symbolizationRequestErrorsTotal *prometheus.CounterVec + symbolizationDuration prometheus.Histogram + symbolizationLocationTotal *prometheus.CounterVec +} + +func NewMetrics(reg prometheus.Registerer) *Metrics { + m := &Metrics{ + registerer: reg, + debuginfodRequestDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Name: "pyroscope_symbolizer_debuginfod_request_duration_seconds", + Help: "Time spent performing debuginfod requests", + Buckets: []float64{0.1, 0.5, 1, 5, 10, 30, 60, 120, 300}, + }, []string{"status"}, + ), + debuginfodFileSize: prometheus.NewHistogram( + prometheus.HistogramOpts{ + Name: "pyroscope_symbolizer_debuginfo_file_size_bytes", + Help: "Size of debug info files fetched from debuginfod", + // 1MB to 4GB + Buckets: prometheus.ExponentialBuckets(1024*1024, 2, 12), + }, + ), + debuginfodRequestsTotal: prometheus.NewCounter(prometheus.CounterOpts{ + Name: "pyroscope_symbolizer_debuginfod_requests_total", + Help: "Total number of debuginfod requests attempted", + }), + debuginfodRequestErrorsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "pyroscope_symbolizer_debuginfod_request_errors_total", + Help: "Total number of debuginfod request errors", + }, []string{"reason"}), + cacheRequestsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "pyroscope_symbolizer_cache_requests_total", + Help: "Total number of cache requests", + }, []string{"operation"}), + cacheRequestErrorsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "pyroscope_symbolizer_cache_request_errors_total", + Help: "Total number of cache request errors", + }, []string{"operation", "reason"}), // get/put, and specific error reasons + cacheHitsTotal: prometheus.NewCounter(prometheus.CounterOpts{ + Name: "pyroscope_symbolizer_cache_hits_total", + Help: "Total number of cache hits", + }), + cacheMissesTotal: prometheus.NewCounter(prometheus.CounterOpts{ + Name: "pyroscope_symbolizer_cache_misses_total", + Help: "Total number of cache misses", + }), + cacheOperationDuration: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "pyroscope_symbolizer_cache_operation_duration_seconds", + Help: "Time spent performing cache operations", + Buckets: []float64{.01, .05, .1, .5, 1, 5, 10, 30, 60}, + }, + []string{"operation"}, + ), + cacheExpiredTotal: prometheus.NewCounter(prometheus.CounterOpts{ + Name: "pyroscope_symbolizer_cache_expired_total", + Help: "Total number of expired items removed from cache", + }), + symbolizationRequestsTotal: prometheus.NewCounter(prometheus.CounterOpts{ + Name: "pyroscope_symbolizer_requests_total", + Help: "Total number of symbolization requests", + }), + symbolizationRequestErrorsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "pyroscope_symbolizer_request_errors_total", + Help: "Total number of symbolization errors", + }, []string{"reason"}), + symbolizationDuration: prometheus.NewHistogram( + prometheus.HistogramOpts{ + Name: "pyroscope_symbolizer_duration_seconds", + Help: "Time spent performing symbolization", + Buckets: []float64{.01, .05, .1, .5, 1, 5, 10, 30}, + }, + ), + symbolizationLocationTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "pyroscope_symbolizer_locations_total", + Help: "Total number of locations processed", + }, []string{"status"}), + } + m.register() + return m +} + +func (m *Metrics) register() { + if m.registerer == nil { + return + } + + collectors := []prometheus.Collector{ + m.debuginfodRequestDuration, + m.debuginfodFileSize, + m.debuginfodRequestErrorsTotal, + m.debuginfodRequestsTotal, + m.cacheRequestsTotal, + m.cacheRequestErrorsTotal, + m.cacheHitsTotal, + m.cacheMissesTotal, + m.cacheOperationDuration, + m.cacheExpiredTotal, + m.symbolizationRequestsTotal, + m.symbolizationRequestErrorsTotal, + m.symbolizationDuration, + m.symbolizationLocationTotal, + } + + for _, collector := range collectors { + m.registerer.MustRegister(collector) + } +} + +func (m *Metrics) Unregister() { + if m.registerer == nil { + return + } + + collectors := []prometheus.Collector{ + m.debuginfodRequestDuration, + m.debuginfodFileSize, + m.debuginfodRequestErrorsTotal, + m.debuginfodRequestsTotal, + m.cacheRequestsTotal, + m.cacheRequestErrorsTotal, + m.cacheHitsTotal, + m.cacheMissesTotal, + m.cacheOperationDuration, + m.cacheExpiredTotal, + m.symbolizationRequestsTotal, + m.symbolizationRequestErrorsTotal, + m.symbolizationDuration, + m.symbolizationLocationTotal, + } + + for _, collector := range collectors { + m.registerer.Unregister(collector) + } +} diff --git a/pkg/experiment/symbolizer/symbolizer.go b/pkg/experiment/symbolizer/symbolizer.go new file mode 100644 index 0000000000..89a8907809 --- /dev/null +++ b/pkg/experiment/symbolizer/symbolizer.go @@ -0,0 +1,195 @@ +package symbolizer + +import ( + "context" + "debug/dwarf" + "debug/elf" + "flag" + "fmt" + "io" + "os" + "time" + + "github.com/prometheus/client_golang/prometheus" + + objstoreclient "github.com/grafana/pyroscope/pkg/objstore/client" +) + +// DwarfResolver implements the liner interface +type DwarfResolver struct { + debugData *dwarf.Data + dbgFile *DWARFInfo + file *elf.File +} + +func NewDwarfResolver(f *elf.File) (SymbolResolver, error) { + debugData, err := f.DWARF() + if err != nil { + return nil, fmt.Errorf("read DWARF data: %w", err) + } + + debugInfo := NewDWARFInfo(debugData) + + return &DwarfResolver{ + debugData: debugData, + dbgFile: debugInfo, + file: f, + }, nil +} + +func (d *DwarfResolver) ResolveAddress(ctx context.Context, pc uint64) ([]SymbolLocation, error) { + return d.dbgFile.ResolveAddress(ctx, pc) +} + +func (d *DwarfResolver) Close() error { + return d.file.Close() +} + +type Config struct { + DebuginfodURL string `yaml:"debuginfod_url"` + Cache CacheConfig `yaml:"cache"` + Storage objstoreclient.Config `yaml:"storage"` +} + +type Symbolizer struct { + client DebuginfodClient + cache DebugInfoCache + metrics *Metrics +} + +func NewSymbolizer(client DebuginfodClient, cache DebugInfoCache, reg prometheus.Registerer) *Symbolizer { + if cache == nil { + cache = NewNullCache() + } + return &Symbolizer{ + client: client, + cache: cache, + metrics: NewMetrics(reg), + } +} + +func NewFromConfig(ctx context.Context, cfg Config, reg prometheus.Registerer) (*Symbolizer, error) { + metrics := NewMetrics(reg) + + // Default to no caching + var cache = NewNullCache() + + if cfg.Cache.Enabled { + if cfg.Storage.Backend == "" { + return nil, fmt.Errorf("storage configuration required when cache is enabled") + } + bucket, err := objstoreclient.NewBucket(ctx, cfg.Storage, "debuginfo") + if err != nil { + return nil, fmt.Errorf("create debug info storage: %w", err) + } + cache = NewObjstoreCache(bucket, cfg.Cache.MaxAge, metrics) + } + + client := NewDebuginfodClient(cfg.DebuginfodURL, metrics) + + return &Symbolizer{ + client: client, + cache: cache, + metrics: metrics, + }, nil +} + +func (s *Symbolizer) Symbolize(ctx context.Context, req Request) error { + start := time.Now() + defer func() { + s.metrics.symbolizationDuration.Observe(time.Since(start).Seconds()) + }() + + debugReader, err := s.cache.Get(ctx, req.BuildID) + if err == nil { + defer debugReader.Close() + return s.symbolizeFromReader(ctx, debugReader, req) + } + + // Cache miss - fetch from debuginfod + filepath, err := s.client.FetchDebuginfo(req.BuildID) + if err != nil { + s.metrics.symbolizationRequestErrorsTotal.WithLabelValues("debuginfod_error").Inc() + return fmt.Errorf("fetch debuginfo: %w", err) + } + + // Open for symbolization + f, err := os.Open(filepath) + if err != nil { + s.metrics.symbolizationRequestErrorsTotal.WithLabelValues("file_error").Inc() + return fmt.Errorf("open debug file: %w", err) + } + defer f.Close() + + // Cache it for future use + if _, err := f.Seek(0, 0); err != nil { + return fmt.Errorf("seek file: %w", err) + } + if err := s.cache.Put(ctx, req.BuildID, f); err != nil { + // TODO: Log it but don't fail? + } + + // Seek back to start for symbolization + if _, err := f.Seek(0, 0); err != nil { + return fmt.Errorf("seek file: %w", err) + } + + return s.symbolizeFromReader(ctx, f, req) +} + +func (s *Symbolizer) symbolizeFromReader(ctx context.Context, r io.ReadCloser, req Request) error { + elfFile, err := elf.NewFile(io.NewSectionReader(r.(io.ReaderAt), 0, 1<<63-1)) + if err != nil { + s.metrics.symbolizationRequestErrorsTotal.WithLabelValues("elf_error").Inc() + return fmt.Errorf("create ELF file from reader: %w", err) + } + defer elfFile.Close() + + // Get executable info for address normalization + ei, err := ExecutableInfoFromELF(elfFile) + if err != nil { + s.metrics.symbolizationRequestErrorsTotal.WithLabelValues("elf_info_error").Inc() + return fmt.Errorf("executable info from ELF: %w", err) + } + + // Create liner + liner, err := NewDwarfResolver(elfFile) + if err != nil { + s.metrics.symbolizationRequestErrorsTotal.WithLabelValues("dwarf_error").Inc() + } + //defer liner.Close() + + for _, mapping := range req.Mappings { + for _, loc := range mapping.Locations { + addr, err := MapRuntimeAddress(loc.Address, ei, Mapping{ + Start: loc.Mapping.Start, + Limit: loc.Mapping.Limit, + Offset: loc.Mapping.Offset, + }) + if err != nil { + s.metrics.symbolizationLocationTotal.WithLabelValues("error").Inc() + return fmt.Errorf("normalize address: %w", err) + } + + // Get source lines for the address + lines, err := liner.ResolveAddress(ctx, addr) + if err != nil { + s.metrics.symbolizationLocationTotal.WithLabelValues("error").Inc() + return fmt.Errorf("resolve address: %w", err) + } + + loc.Lines = lines + s.metrics.symbolizationLocationTotal.WithLabelValues("success").Inc() + } + } + + return nil +} + +func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { + f.StringVar(&cfg.DebuginfodURL, prefix+".debuginfod-url", "https://debuginfod.elfutils.org", "URL of the debuginfod server") + + cachePrefix := prefix + ".cache" + f.BoolVar(&cfg.Cache.Enabled, cachePrefix+".enabled", false, "Enable debug info caching") + f.DurationVar(&cfg.Cache.MaxAge, cachePrefix+".max-age", 7*24*time.Hour, "Maximum age of cached debug info") +} diff --git a/pkg/experiment/symbolizer/types.go b/pkg/experiment/symbolizer/types.go new file mode 100644 index 0000000000..87ec145567 --- /dev/null +++ b/pkg/experiment/symbolizer/types.go @@ -0,0 +1,45 @@ +package symbolizer + +import ( + "context" + + pprof "github.com/google/pprof/profile" +) + +// SymbolLocation represents a resolved source code location with function information +type SymbolLocation struct { + Function *pprof.Function + Line int64 +} + +// Location represents a memory address to be symbolized +type Location struct { + ID string + Address uint64 + Lines []SymbolLocation + Mapping *pprof.Mapping +} + +// Request represents a symbolization request for multiple addresses +type Request struct { + BuildID string + Mappings []RequestMapping +} + +type RequestMapping struct { + Locations []*Location +} + +// Mapping describes how a binary section is mapped in memory +type Mapping struct { + Start uint64 + End uint64 + Limit uint64 + Offset uint64 +} + +// SymbolResolver converts memory addresses to source code locations +type SymbolResolver interface { + ResolveAddress(ctx context.Context, addr uint64) ([]SymbolLocation, error) + //Close() error +} diff --git a/pkg/phlaredb/symdb/resolver.go b/pkg/phlaredb/symdb/resolver.go index aa5438577a..1331ffb308 100644 --- a/pkg/phlaredb/symdb/resolver.go +++ b/pkg/phlaredb/symdb/resolver.go @@ -11,6 +11,7 @@ import ( googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" + "github.com/grafana/pyroscope/pkg/experiment/symbolizer" "github.com/grafana/pyroscope/pkg/model" schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" "github.com/grafana/pyroscope/pkg/pprof" @@ -37,6 +38,8 @@ type Resolver struct { maxNodes int64 sts *typesv1.StackTraceSelector + + symbolizer *symbolizer.Symbolizer } type ResolverOption func(*Resolver) @@ -57,6 +60,12 @@ func WithResolverMaxNodes(n int64) ResolverOption { } } +func WithSymbolizer(s *symbolizer.Symbolizer) ResolverOption { + return func(r *Resolver) { + r.symbolizer = s + } +} + // WithResolverStackTraceSelector specifies the stack trace selector. // Only stack traces that belong to the callSite (have the prefix provided) // will be selected. If empty, the filter is ignored. @@ -273,7 +282,9 @@ func (r *Resolver) withSymbols(ctx context.Context, fn func(*Symbols, *SampleApp if err := p.fetch(ctx); err != nil { return err } - return fn(p.reader.Symbols(), p.samples) + symbols := p.reader.Symbols() + symbols.SetSymbolizer(r.symbolizer) + return fn(symbols, p.samples) })) } return g.Wait() @@ -295,3 +306,18 @@ func (r *Symbols) Tree( ) (*model.Tree, error) { return buildTree(ctx, r, appender, maxNodes) } + +func (r *Symbols) SetSymbolizer(sym *symbolizer.Symbolizer) { + r.Symbolizer = sym +} + +func (r *Symbols) needsDebuginfodSymbolization(loc *schemav1.InMemoryLocation, mapping *schemav1.InMemoryMapping) bool { + if r.Symbolizer == nil { + return false + } + if len(loc.Line) == 0 { + // Must have mapping with build ID + return mapping != nil && mapping.BuildId != 0 + } + return false +} diff --git a/pkg/phlaredb/symdb/resolver_tree.go b/pkg/phlaredb/symdb/resolver_tree.go index 5397e1761b..6e0ef63f6b 100644 --- a/pkg/phlaredb/symdb/resolver_tree.go +++ b/pkg/phlaredb/symdb/resolver_tree.go @@ -2,10 +2,13 @@ package symdb import ( "context" + "fmt" "sync" + pprof "github.com/google/pprof/profile" "golang.org/x/sync/errgroup" + "github.com/grafana/pyroscope/pkg/experiment/symbolizer" "github.com/grafana/pyroscope/pkg/iter" "github.com/grafana/pyroscope/pkg/model" schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" @@ -19,6 +22,14 @@ func buildTree( appender *SampleAppender, maxNodes int64, ) (*model.Tree, error) { + // Try debuginfod symbolization first + if symbols != nil && symbols.Symbolizer != nil { + //nolint:staticcheck + if err := symbolizeLocations(ctx, symbols); err != nil { + // TODO: Log/process error but continue? partial symbolization is better than none + } + } + // If the number of samples is large (> 128K) and the StacktraceResolver // implements the range iterator, we will be building the tree based on // the parent pointer tree of the partition (a copy of). The only exception @@ -239,3 +250,93 @@ func minValue(nodes []Node, maxNodes int64) int64 { } return h[0] } + +func symbolizeLocations(ctx context.Context, symbols *Symbols) error { + var errs []error + + type locToSymbolize struct { + idx int32 + loc *schemav1.InMemoryLocation + mapping *schemav1.InMemoryMapping + } + locsByBuildId := make(map[string][]locToSymbolize) + + // Find all locations needing symbolization + for i, loc := range symbols.Locations { + locCopy := loc + if mapping := &symbols.Mappings[loc.MappingId]; symbols.needsDebuginfodSymbolization(&loc, mapping) { + buildIDStr := symbols.Strings[mapping.BuildId] + locsByBuildId[buildIDStr] = append(locsByBuildId[buildIDStr], locToSymbolize{ + idx: int32(i), + loc: &locCopy, + mapping: mapping, + }) + } + } + + for buildID, locs := range locsByBuildId { + req := symbolizer.Request{ + BuildID: buildID, + Mappings: []symbolizer.RequestMapping{{ + Locations: make([]*symbolizer.Location, len(locs)), + }}, + } + + for i, loc := range locs { + req.Mappings[0].Locations[i] = &symbolizer.Location{ + Address: loc.loc.Address, + Mapping: &pprof.Mapping{ + Start: loc.mapping.MemoryStart, + Limit: loc.mapping.MemoryLimit, + Offset: loc.mapping.FileOffset, + BuildID: buildID, + }, + } + } + + if err := symbols.Symbolizer.Symbolize(ctx, req); err != nil { + errs = append(errs, fmt.Errorf("symbolize build ID %s: %w", buildID, err)) + continue + } + + // Store symbolization results back + for i, symLoc := range req.Mappings[0].Locations { + if len(symLoc.Lines) > 0 { + // Get the original location we're updating + locIdx := locs[i].idx + + // Clear the existing lines for the location + symbols.Locations[locIdx].Line = nil + + for _, line := range symLoc.Lines { + // Create string entries first + nameIdx := uint32(len(symbols.Strings)) + symbols.Strings = append(symbols.Strings, line.Function.Name) + + filenameIdx := uint32(len(symbols.Strings)) + symbols.Strings = append(symbols.Strings, line.Function.Filename) + + // Create function entry + funcId := uint32(len(symbols.Functions)) + symbols.Functions = append(symbols.Functions, schemav1.InMemoryFunction{ + Id: uint64(funcId), + Name: nameIdx, + Filename: filenameIdx, + StartLine: uint32(line.Function.StartLine), + }) + + symbols.Locations[locIdx].Line = append(symbols.Locations[locIdx].Line, schemav1.InMemoryLine{ + FunctionId: funcId, + Line: int32(line.Line), + }) + } + } + } + } + + if len(errs) > 0 { + return fmt.Errorf("symbolization errors: %v", errs) + } + + return nil +} diff --git a/pkg/phlaredb/symdb/resolver_tree_test.go b/pkg/phlaredb/symdb/resolver_tree_test.go index 16d5e17af5..006cab047e 100644 --- a/pkg/phlaredb/symdb/resolver_tree_test.go +++ b/pkg/phlaredb/symdb/resolver_tree_test.go @@ -2,12 +2,14 @@ package symdb import ( "context" + "fmt" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" + "github.com/grafana/pyroscope/pkg/experiment/symbolizer" v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" ) @@ -167,3 +169,198 @@ func Test_buildTreeFromParentPointerTrees(t *testing.T) { require.Equal(t, expectedTree, resolved.String()) } + +func Test_buildTree_Symbolization(t *testing.T) { + t.Run("no symbolizer configured", func(t *testing.T) { + stringTable := []string{ + "", + "test", + "hex_address", + } + + p := &profilev1.Profile{ + Sample: []*profilev1.Sample{ + {LocationId: []uint64{1}, Value: []int64{100}}, + }, + Location: []*profilev1.Location{ + { + Id: 1, + MappingId: 1, + Address: 0x1234, + Line: []*profilev1.Line{ + { + FunctionId: 1, + Line: 0, + }, + }, + }, + }, + Function: []*profilev1.Function{ + { + Id: 1, + Name: 2, + SystemName: 2, + Filename: 1, + }, + }, + Mapping: []*profilev1.Mapping{ + { + Id: 1, + MemoryStart: 0x1000, + MemoryLimit: 0x2000, + BuildId: int64(1), + Filename: 1, + }, + }, + StringTable: stringTable, + SampleType: []*profilev1.ValueType{ + {Type: 1, Unit: 1}, + }, + } + + s := newMemSuite(t, nil) // Start with empty suite + const partition = 0 + indexed := s.db.WriteProfileSymbols(partition, p) + + // Get symbols through PartitionWriter + partitionWriter := s.db.PartitionWriter(partition) + symbols := partitionWriter.Symbols() + symbols.Symbolizer = nil + + appender := NewSampleAppender() + appender.AppendMany(indexed[partition].Samples.StacktraceIDs, indexed[partition].Samples.Values) + + tree, err := buildTree(context.Background(), symbols, appender, 0) + require.NoError(t, err) + + require.NotEmpty(t, tree.String()) + require.Contains(t, tree.String(), "hex_address") + require.Equal(t, uint64(4660), symbols.Locations[0].Address) + require.Len(t, symbols.Locations[0].Line, 1) + }) + + t.Run("with symbolizer configured", func(t *testing.T) { + stringTable := []string{ + "", + "test", + "hex_address", + } + + p := &profilev1.Profile{ + Sample: []*profilev1.Sample{ + {LocationId: []uint64{1}, Value: []int64{100}}, + }, + Location: []*profilev1.Location{{ + Id: 1, + MappingId: 1, + Address: 0x3c5a, + // No Line info - this is what should get symbolized + }, + }, + Mapping: []*profilev1.Mapping{ + { + Id: 1, + MemoryStart: 0x1000, + MemoryLimit: 0x2000, + BuildId: int64(1), + Filename: 1, + }, + }, + StringTable: stringTable, + SampleType: []*profilev1.ValueType{ + {Type: 1, Unit: 1}, + }, + } + + s := newMemSuite(t, nil) + const partition = 0 + indexed := s.db.WriteProfileSymbols(partition, p) + + partitionWriter := s.db.PartitionWriter(partition) + symbols := partitionWriter.Symbols() + + mockClient := &mockDebuginfodClient{ + fetchFunc: func(buildID string) (string, error) { + return "testdata/unsymbolized.debug", nil + }, + } + sym := symbolizer.NewSymbolizer(mockClient, nil, nil) + symbols.SetSymbolizer(sym) + + appender := NewSampleAppender() + appender.AppendMany(indexed[partition].Samples.StacktraceIDs, indexed[partition].Samples.Values) + + tree, err := buildTree(context.Background(), symbols, appender, 0) + require.NoError(t, err) + + require.NotEmpty(t, tree.String()) + require.Contains(t, tree.String(), "fprintf") + require.NotEmpty(t, symbols.Locations[0].Line) + }) + + t.Run("with symbolizer configured", func(t *testing.T) { + stringTable := []string{ + "", + "test", + "hex_address", + } + + p := &profilev1.Profile{ + Sample: []*profilev1.Sample{ + {LocationId: []uint64{1}, Value: []int64{100}}, + }, + Location: []*profilev1.Location{{ + Id: 1, + MappingId: 1, + Address: 0x3c5a, + // No Line info - this is what should get symbolized + }, + }, + Mapping: []*profilev1.Mapping{ + { + Id: 1, + MemoryStart: 0x1000, + MemoryLimit: 0x2000, + BuildId: int64(1), + Filename: 1, + }, + }, + StringTable: stringTable, + SampleType: []*profilev1.ValueType{ + {Type: 1, Unit: 1}, + }, + } + + s := newMemSuite(t, nil) + const partition = 0 + indexed := s.db.WriteProfileSymbols(partition, p) + + partitionWriter := s.db.PartitionWriter(partition) + symbols := partitionWriter.Symbols() + + mockClient := &mockDebuginfodClient{ + fetchFunc: func(buildID string) (string, error) { + return "", fmt.Errorf("symbolization failed") + }, + } + sym := symbolizer.NewSymbolizer(mockClient, nil, nil) + symbols.SetSymbolizer(sym) + + appender := NewSampleAppender() + appender.AppendMany(indexed[partition].Samples.StacktraceIDs, indexed[partition].Samples.Values) + + _, err := buildTree(context.Background(), symbols, appender, 0) + require.NoError(t, err) + }) +} + +type mockDebuginfodClient struct { + fetchFunc func(buildID string) (string, error) +} + +func (m *mockDebuginfodClient) FetchDebuginfo(buildID string) (string, error) { + if m.fetchFunc != nil { + return m.fetchFunc(buildID) + } + return "", nil +} diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go index def005ebf0..5a8aef7cd7 100644 --- a/pkg/phlaredb/symdb/symdb.go +++ b/pkg/phlaredb/symdb/symdb.go @@ -8,6 +8,7 @@ import ( "time" profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" + "github.com/grafana/pyroscope/pkg/experiment/symbolizer" "github.com/grafana/pyroscope/pkg/iter" "github.com/grafana/pyroscope/pkg/phlaredb/block" schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" @@ -30,6 +31,8 @@ type Symbols struct { Mappings []schemav1.InMemoryMapping Functions []schemav1.InMemoryFunction Strings []string + + Symbolizer *symbolizer.Symbolizer } type PartitionStats struct { diff --git a/pkg/phlaredb/symdb/testdata/unsymbolized.debug b/pkg/phlaredb/symdb/testdata/unsymbolized.debug new file mode 100644 index 0000000000..61cdfba1aa Binary files /dev/null and b/pkg/phlaredb/symdb/testdata/unsymbolized.debug differ