-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
proxy: Make sure there is no race condition between sender and recv of resp channel #745
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -75,6 +75,25 @@ func (s *ProxyStore) Info(ctx context.Context, r *storepb.InfoRequest) (*storepb | |
return res, nil | ||
} | ||
|
||
type ctxRespSender struct { | ||
ctx context.Context | ||
ch chan<- *storepb.SeriesResponse | ||
} | ||
|
||
func newRespCh(ctx context.Context, buffer int) (*ctxRespSender, <-chan *storepb.SeriesResponse, func()) { | ||
respCh := make(chan *storepb.SeriesResponse, buffer) | ||
return &ctxRespSender{ctx: ctx, ch: respCh}, respCh, func() { close(respCh) } | ||
} | ||
|
||
func (s ctxRespSender) send(r *storepb.SeriesResponse) { | ||
select { | ||
case <-s.ctx.Done(): | ||
return | ||
case s.ch <- r: | ||
return | ||
} | ||
} | ||
|
||
// Series returns all series for a requested time range and label matcher. Requested series are taken from other | ||
// stores and proxied to RPC client. NOTE: Resulted data are not trimmed exactly to min and max time range. | ||
func (s *ProxyStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesServer) error { | ||
|
@@ -94,70 +113,82 @@ func (s *ProxyStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSe | |
} | ||
|
||
var ( | ||
seriesSet []storepb.SeriesSet | ||
respCh = make(chan *storepb.SeriesResponse, len(stores)+1) | ||
g, gctx = errgroup.WithContext(srv.Context()) | ||
) | ||
g, gctx = errgroup.WithContext(srv.Context()) | ||
|
||
var storeDebugMsgs []string | ||
// Allow to buffer max 10 series response. | ||
// Each might be quite large (multi chunk long series given by sidecar). | ||
respSender, respRecv, closeFn = newRespCh(gctx, 10) | ||
) | ||
|
||
for _, st := range stores { | ||
// We might be able to skip the store if its meta information indicates | ||
// it cannot have series matching our query. | ||
// NOTE: all matchers are validated in labelsMatches method so we explicitly ignore error. | ||
if ok, _ := storeMatches(st, r.MinTime, r.MaxTime, newMatchers...); !ok { | ||
storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("store %s filtered out", st)) | ||
continue | ||
} | ||
storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("store %s queried", st)) | ||
|
||
sc, err := st.Series(gctx, &storepb.SeriesRequest{ | ||
MinTime: r.MinTime, | ||
MaxTime: r.MaxTime, | ||
Matchers: newMatchers, | ||
Aggregates: r.Aggregates, | ||
MaxResolutionWindow: r.MaxResolutionWindow, | ||
PartialResponseDisabled: r.PartialResponseDisabled, | ||
}) | ||
if err != nil { | ||
storeID := fmt.Sprintf("%v", storepb.LabelsToString(st.Labels())) | ||
if storeID == "" { | ||
storeID = "Store Gateway" | ||
g.Go(func() error { | ||
var ( | ||
seriesSet []storepb.SeriesSet | ||
storeDebugMsgs []string | ||
r = &storepb.SeriesRequest{ | ||
MinTime: r.MinTime, | ||
MaxTime: r.MaxTime, | ||
Matchers: newMatchers, | ||
Aggregates: r.Aggregates, | ||
MaxResolutionWindow: r.MaxResolutionWindow, | ||
PartialResponseDisabled: r.PartialResponseDisabled, | ||
} | ||
err = errors.Wrapf(err, "fetch series for %s %s", storeID, st) | ||
if r.PartialResponseDisabled { | ||
level.Error(s.logger).Log("err", err, "msg", "partial response disabled; aborting request") | ||
return err | ||
wg = &sync.WaitGroup{} | ||
) | ||
|
||
defer func() { | ||
wg.Wait() | ||
closeFn() | ||
}() | ||
|
||
for _, st := range stores { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if there are a large number of stores e.g. 100+ we would end up spinning up 100+ goroutines that would then all go and fetch 10 series from their store ... does this also risk OOMing us? That could be then 1000 series in memory on the query as we are merging. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hm.... Go routines are not a problem, there can be thousands of those. But you are right.. the 10 buffer for each store and 10 for global channel makes it more problematic for large number of stores. Wonder if we should start some ticket to discuss potential optimization for those. Having pool of go routines could work. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
// We might be able to skip the store if its meta information indicates | ||
// it cannot have series matching our query. | ||
// NOTE: all matchers are validated in labelsMatches method so we explicitly ignore error. | ||
if ok, _ := storeMatches(st, r.MinTime, r.MaxTime, r.Matchers...); !ok { | ||
storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("store %s filtered out", st)) | ||
continue | ||
} | ||
respCh <- storepb.NewWarnSeriesResponse(err) | ||
continue | ||
} | ||
storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("store %s queried", st)) | ||
|
||
seriesSet = append(seriesSet, startStreamSeriesSet(sc, respCh, 10)) | ||
} | ||
sc, err := st.Series(gctx, r) | ||
if err != nil { | ||
storeID := fmt.Sprintf("%v", storepb.LabelsToString(st.Labels())) | ||
if storeID == "" { | ||
storeID = "Store Gateway" | ||
} | ||
err = errors.Wrapf(err, "fetch series for %s %s", storeID, st) | ||
if r.PartialResponseDisabled { | ||
level.Error(s.logger).Log("err", err, "msg", "partial response disabled; aborting request") | ||
return err | ||
} | ||
respSender.send(storepb.NewWarnSeriesResponse(err)) | ||
continue | ||
} | ||
|
||
level.Debug(s.logger).Log("msg", strings.Join(storeDebugMsgs, ";")) | ||
g.Go(func() error { | ||
defer close(respCh) | ||
// Schedule streamSeriesSet that translates gRPC streamed response into seriesSet (if series) or respCh if warnings. | ||
seriesSet = append(seriesSet, startStreamSeriesSet(gctx, wg, sc, respSender, st.String(), !r.PartialResponseDisabled)) | ||
} | ||
|
||
level.Debug(s.logger).Log("msg", strings.Join(storeDebugMsgs, ";")) | ||
|
||
if len(seriesSet) == 0 { | ||
// This is indicates that configured StoreAPIs are not the ones end user expects | ||
err := errors.New("No store matched for this query") | ||
level.Warn(s.logger).Log("err", err, "stores", strings.Join(storeDebugMsgs, ";")) | ||
respCh <- storepb.NewWarnSeriesResponse(err) | ||
respSender.send(storepb.NewWarnSeriesResponse(err)) | ||
return nil | ||
} | ||
|
||
mergedSet := storepb.MergeSeriesSets(seriesSet...) | ||
for mergedSet.Next() { | ||
var series storepb.Series | ||
series.Labels, series.Chunks = mergedSet.At() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not needed in this PR but would be good to add documentation to the interface of the iterator methods ... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes 👍 This came from the TSDB I think. Worth to contribute such there.. or actually hosting this in Thanos is important as well I think since we use it heavily. Where to put it though? Maybe blog post? |
||
respCh <- storepb.NewSeriesResponse(&series) | ||
respSender.send(storepb.NewSeriesResponse(&series)) | ||
} | ||
return mergedSet.Err() | ||
}) | ||
|
||
for resp := range respCh { | ||
for resp := range respRecv { | ||
if err := srv.Send(resp); err != nil { | ||
return status.Error(codes.Unknown, errors.Wrap(err, "send series response").Error()) | ||
} | ||
|
@@ -171,48 +202,74 @@ func (s *ProxyStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSe | |
|
||
} | ||
|
||
type warnSender interface { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit - would move the declaration of this interface up near the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sooo I think it can be in both places, but the pattern is to have consumers of the interface to define the interface, so I vote for leaving it near consumer. Example references to this pattern: https://stackoverflow.com/questions/53381694/where-should-we-define-go-interface-in-multiple-consumer-scenario-what-about-i#comment93639200_53381694 https://twitter.com/davecheney/status/942593128355192832?lang=en |
||
send(*storepb.SeriesResponse) | ||
} | ||
|
||
// streamSeriesSet iterates over incoming stream of series. | ||
// All errors are sent out of band via warning channel. | ||
type streamSeriesSet struct { | ||
stream storepb.Store_SeriesClient | ||
warnCh chan<- *storepb.SeriesResponse | ||
warnCh warnSender | ||
|
||
currSeries *storepb.Series | ||
recvCh chan *storepb.Series | ||
|
||
errMtx sync.Mutex | ||
err error | ||
|
||
name string | ||
} | ||
|
||
func startStreamSeriesSet( | ||
ctx context.Context, | ||
wg *sync.WaitGroup, | ||
stream storepb.Store_SeriesClient, | ||
warnCh chan<- *storepb.SeriesResponse, | ||
bufferSize int, | ||
warnCh warnSender, | ||
name string, | ||
partialResponse bool, | ||
) *streamSeriesSet { | ||
s := &streamSeriesSet{ | ||
stream: stream, | ||
warnCh: warnCh, | ||
recvCh: make(chan *storepb.Series, bufferSize), | ||
recvCh: make(chan *storepb.Series, 10), | ||
name: name, | ||
} | ||
go s.fetchLoop() | ||
return s | ||
} | ||
|
||
func (s *streamSeriesSet) fetchLoop() { | ||
defer close(s.recvCh) | ||
for { | ||
r, err := s.stream.Recv() | ||
if err == io.EOF { | ||
return | ||
} | ||
if err != nil { | ||
s.warnCh <- storepb.NewWarnSeriesResponse(errors.Wrap(err, "receive series")) | ||
return | ||
} | ||
wg.Add(1) | ||
go func() { | ||
defer wg.Done() | ||
defer close(s.recvCh) | ||
for { | ||
r, err := s.stream.Recv() | ||
if err == io.EOF { | ||
return | ||
} | ||
|
||
if w := r.GetWarning(); w != "" { | ||
s.warnCh <- storepb.NewWarnSeriesResponse(errors.New(w)) | ||
continue | ||
if ctx.Err() != nil { | ||
return | ||
} | ||
|
||
if err != nil { | ||
if partialResponse { | ||
s.warnCh.send(storepb.NewWarnSeriesResponse(errors.Wrap(err, "receive series"))) | ||
return | ||
} | ||
|
||
s.errMtx.Lock() | ||
defer s.errMtx.Unlock() | ||
s.err = err | ||
return | ||
} | ||
|
||
if w := r.GetWarning(); w != "" { | ||
s.warnCh.send(storepb.NewWarnSeriesResponse(errors.New(w))) | ||
continue | ||
} | ||
s.recvCh <- r.GetSeries() | ||
} | ||
s.recvCh <- r.GetSeries() | ||
} | ||
}() | ||
return s | ||
} | ||
|
||
// Next blocks until new message is received or stream is closed. | ||
|
@@ -228,7 +285,9 @@ func (s *streamSeriesSet) At() ([]storepb.Label, []storepb.AggrChunk) { | |
return s.currSeries.Labels, s.currSeries.Chunks | ||
} | ||
func (s *streamSeriesSet) Err() error { | ||
return nil | ||
s.errMtx.Lock() | ||
defer s.errMtx.Unlock() | ||
return errors.Wrap(s.err, s.name) | ||
} | ||
|
||
// matchStore returns true if the given store may hold data for the given label matchers. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why hardcode
10
?Is that something that could impact performances (in both good and bad way) ? What if thanos is running on big machines, might people want to be able to tune that parameter ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good question.
This is a tradeoff on buffering memory vs time when connection is open.
The problem right now that this message can be bit large as we send THE whole series for sidecar. Imagine scrape inveral 5 (kind of worse scenario), 6w query, single series. Message might be up to 2MB.. single query will buffer up to 20MB in that worst case. Imagine multiple queries like this.. etc (: So keeping it low makes sense more.
In terms of giving control above we can do it later, but we need to make sure it easy to understand and if there is nothing else that limits/extands that buffer (e.g gRPC itself on both ends)