Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add content negotiation support and sharding parameters for Query Frontend #375

Merged
merged 22 commits into from
Dec 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## master / unreleased

* [CHANGE] Redo tempo-cli with basic command structure and improvements [#385](https://github.com/grafana/tempo/pull/385)
* [CHANGE] Add content negotiation support and sharding parameters to Querier [#375](https://github.com/grafana/tempo/pull/375)

## v0.4.0

Expand Down
12 changes: 7 additions & 5 deletions cmd/tempo-query/tempo/plugin.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
package tempo

import (
"bytes"
"context"
"fmt"
"io/ioutil"
"net/http"
"time"

"github.com/golang/protobuf/jsonpb"
"github.com/golang/protobuf/proto"
"github.com/grafana/tempo/pkg/tempopb"
"github.com/grafana/tempo/pkg/util"
"github.com/opentracing/opentracing-go"
ot_log "github.com/opentracing/opentracing-go/log"
"github.com/weaveworks/common/user"
Expand Down Expand Up @@ -59,6 +59,9 @@ func (b *Backend) GetTrace(ctx context.Context, traceID jaeger.TraceID) (*jaeger
req.Header.Set(user.OrgIDHeaderName, tenantID)
}

// Set content type to grpc
req.Header.Set(util.AcceptHeaderKey, util.ProtobufTypeHeaderValue)

resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed get to tempo %w", err)
Expand All @@ -74,10 +77,9 @@ func (b *Backend) GetTrace(ctx context.Context, traceID jaeger.TraceID) (*jaeger
return nil, fmt.Errorf("error reading response from tempo: %w", err)
}
out := &tempopb.Trace{}
unmarshaller := &jsonpb.Unmarshaler{}
err = unmarshaller.Unmarshal(bytes.NewReader(body), out)
err = proto.Unmarshal(body, out)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal trace json, err: %w. Tempo response body: %s", err, string(body))
return nil, fmt.Errorf("failed to unmarshal trace proto, err: %w. Tempo response body: %s", err, string(body))
}

span.LogFields(ot_log.String("msg", "otlp to Jaeger"))
Expand Down
2 changes: 1 addition & 1 deletion example/docker-compose/etc/tempo-local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ storage:
bloom_filter_false_positive: .05 # bloom filter false positive rate. lower values create larger filters but fewer false positives
index_downsample: 10 # number of traces per index record
local:
path: /tmp/tempo
path: /tmp/tempo/blocks
pool:
max_workers: 100 # the worker pool mainly drives querying, but is also used for polling the blocklist
queue_depth: 10000
87 changes: 83 additions & 4 deletions modules/querier/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,22 @@ import (
"time"

"github.com/golang/protobuf/jsonpb"
"github.com/golang/protobuf/proto"
"github.com/google/uuid"
"github.com/gorilla/mux"
"github.com/grafana/tempo/pkg/tempopb"
"github.com/grafana/tempo/pkg/util"
"github.com/grafana/tempo/tempodb"
"github.com/opentracing/opentracing-go"
ot_log "github.com/opentracing/opentracing-go/log"
"github.com/pkg/errors"
)

const (
TraceIDVar = "traceID"
TraceIDVar = "traceID"
BlockStartKey = "blockStart"
BlockEndKey = "blockEnd"
QueryIngestersKey = "queryIngesters"
)

// TraceByIDHandler is a http.HandlerFunc to retrieve traces
Expand All @@ -22,9 +31,11 @@ func (q *Querier) TraceByIDHandler(w http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithDeadline(r.Context(), time.Now().Add(q.cfg.QueryTimeout))
defer cancel()

span, ctx := opentracing.StartSpanFromContext(ctx, "Querier.TraceByIDHandler")
defer span.Finish()

vars := mux.Vars(r)
traceID, ok := vars[TraceIDVar]

if !ok {
http.Error(w, "please provide a traceID", http.StatusBadRequest)
return
Expand All @@ -36,10 +47,24 @@ func (q *Querier) TraceByIDHandler(w http.ResponseWriter, r *http.Request) {
return
}

// validate request
blockStart, blockEnd, queryIngesters, err := validateAndSanitizeRequest(r)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
span.LogFields(
ot_log.String("msg", "validated request"),
ot_log.String("blockStart", blockStart),
ot_log.String("blockEnd", blockEnd),
ot_log.Bool("queryIngesters", queryIngesters))

resp, err := q.FindTraceByID(ctx, &tempopb.TraceByIDRequest{
TraceID: byteID,
TraceID: byteID,
joe-elliott marked this conversation as resolved.
Show resolved Hide resolved
BlockStart: blockStart,
BlockEnd: blockEnd,
QueryIngesters: queryIngesters,
})

if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
Expand All @@ -50,10 +75,64 @@ func (q *Querier) TraceByIDHandler(w http.ResponseWriter, r *http.Request) {
return
}

if r.Header.Get(util.AcceptHeaderKey) == util.ProtobufTypeHeaderValue {
b, err := proto.Marshal(resp.Trace)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
_, err = w.Write(b)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
return
}

marshaller := &jsonpb.Marshaler{}
err = marshaller.Marshal(w, resp.Trace)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
}

// return values are (valid, blockStart, blockEnd, queryIngesters)
func validateAndSanitizeRequest(r *http.Request) (string, string, bool, error) {
// get parameter values
q := r.URL.Query().Get(QueryIngestersKey)
start := r.URL.Query().Get(BlockStartKey)
end := r.URL.Query().Get(BlockEndKey)

// validate queryIngesters. it should either be empty or one of (true|false)
var queryIngesters bool
if len(q) == 0 || q == "true" {
queryIngesters = true
} else if q == "false" {
queryIngesters = false
} else {
return "", "", false, fmt.Errorf("invalid value for queryIngesters %s", q)
}

// validate start. it should either be empty or a valid uuid
if len(start) == 0 {
start = tempodb.BlockIDMin
} else {
_, err := uuid.Parse(start)
if err != nil {
return "", "", false, errors.Wrap(err, "invalid value for blockStart")
}
}

// validate end. it should either be empty or a valid uuid
if len(end) == 0 {
end = tempodb.BlockIDMax
} else {
_, err := uuid.Parse(end)
if err != nil {
return "", "", false, errors.Wrap(err, "invalid value for blockEnd")
}
}

return start, end, queryIngesters, nil
}
56 changes: 29 additions & 27 deletions modules/querier/querier.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,40 +126,42 @@ func (q *Querier) FindTraceByID(ctx context.Context, req *tempopb.TraceByIDReque
span, ctx := opentracing.StartSpanFromContext(ctx, "Querier.FindTraceByID")
defer span.Finish()

key := tempo_util.TokenFor(userID, req.TraceID)
var completeTrace *tempopb.Trace
if req.QueryIngesters {
key := tempo_util.TokenFor(userID, req.TraceID)

const maxExpectedReplicationSet = 3 // 3. b/c frigg it
var descs [maxExpectedReplicationSet]ring.IngesterDesc
replicationSet, err := q.ring.Get(key, ring.Read, descs[:0])
if err != nil {
return nil, errors.Wrap(err, "error finding ingesters in Querier.FindTraceByID")
}
const maxExpectedReplicationSet = 3 // 3. b/c frigg it
var descs [maxExpectedReplicationSet]ring.IngesterDesc
replicationSet, err := q.ring.Get(key, ring.Read, descs[:0])
if err != nil {
return nil, errors.Wrap(err, "error finding ingesters in Querier.FindTraceByID")
}

span.LogFields(ot_log.String("msg", "searching ingesters"))
// get responses from all ingesters in parallel
responses, err := q.forGivenIngesters(ctx, replicationSet, func(client tempopb.QuerierClient) (interface{}, error) {
return client.FindTraceByID(opentracing.ContextWithSpan(ctx, span), req)
})
if err != nil {
return nil, errors.Wrap(err, "error querying ingesters in Querier.FindTraceByID")
}
span.LogFields(ot_log.String("msg", "searching ingesters"))
// get responses from all ingesters in parallel
responses, err := q.forGivenIngesters(ctx, replicationSet, func(client tempopb.QuerierClient) (interface{}, error) {
return client.FindTraceByID(opentracing.ContextWithSpan(ctx, span), req)
})
if err != nil {
return nil, errors.Wrap(err, "error querying ingesters in Querier.FindTraceByID")
}

var completeTrace *tempopb.Trace
for _, r := range responses {
trace := r.response.(*tempopb.TraceByIDResponse).Trace
if trace != nil {
var spanCountA, spanCountB, spanCountTotal int
completeTrace, spanCountA, spanCountB, spanCountTotal = tempo_util.CombineTraceProtos(completeTrace, trace)
span.LogFields(ot_log.String("msg", "combined trace protos"),
ot_log.Int("spansCountA", spanCountA),
ot_log.Int("spansCountB", spanCountB),
ot_log.Int("spansCountTotal", spanCountTotal))
for _, r := range responses {
trace := r.response.(*tempopb.TraceByIDResponse).Trace
if trace != nil {
var spanCountA, spanCountB, spanCountTotal int
completeTrace, spanCountA, spanCountB, spanCountTotal = tempo_util.CombineTraceProtos(completeTrace, trace)
span.LogFields(ot_log.String("msg", "combined trace protos"),
ot_log.Int("spansCountA", spanCountA),
ot_log.Int("spansCountB", spanCountB),
ot_log.Int("spansCountTotal", spanCountTotal))
}
}
}

// if the ingester didn't have it check the store.
if completeTrace == nil {
foundBytes, metrics, err := q.store.Find(opentracing.ContextWithSpan(ctx, span), userID, req.TraceID)
foundBytes, metrics, err := q.store.Find(opentracing.ContextWithSpan(ctx, span), userID, req.TraceID, req.BlockStart, req.BlockEnd)
if err != nil {
return nil, errors.Wrap(err, "error querying store in Querier.FindTraceByID")
}
Expand All @@ -186,7 +188,7 @@ func (q *Querier) FindTraceByID(ctx context.Context, req *tempopb.TraceByIDReque
}

// forGivenIngesters runs f, in parallel, for given ingesters
func (q *Querier) forGivenIngesters(ctx context.Context, replicationSet ring.ReplicationSet, f func(tempopb.QuerierClient) (interface{}, error)) ([]responseFromIngesters, error) {
func (q *Querier) forGivenIngesters(ctx context.Context, replicationSet ring.ReplicationSet, f func(client tempopb.QuerierClient) (interface{}, error)) ([]responseFromIngesters, error) {
results, err := replicationSet.Do(ctx, q.cfg.ExtraQueryDelay, func(ingester *ring.IngesterDesc) (interface{}, error) {
client, err := q.pool.GetClientFor(ingester.Addr)
if err != nil {
Expand Down
Loading