From 61a2b5a059b974ff9bfa519c7cad6fd13a43f6e3 Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Tue, 26 Mar 2024 17:11:11 +0900 Subject: [PATCH] buffer: fix emit error of race condition After 95438b2eeb4bb5586a4031c1fbc35756f3c12565 (#4342), there is a section where chunks do not have a lock in `write_step_by_step()`. `write_step_by_step()` must ensure their locks until passing them to the block. Otherwise, race condition can occur and it can cause emit error by IOError. Example of warning messages of emit error: [warn]: #0 emit transaction failed: error_class=IOError error="closed stream" location=... [warn]: #0 send an error event stream to @ERROR: error_class=IOError error="closed stream" location=... Signed-off-by: Daijiro Fukuda --- lib/fluent/plugin/buffer.rb | 143 +++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 68 deletions(-) diff --git a/lib/fluent/plugin/buffer.rb b/lib/fluent/plugin/buffer.rb index 0251de3409..80709c12bb 100644 --- a/lib/fluent/plugin/buffer.rb +++ b/lib/fluent/plugin/buffer.rb @@ -764,94 +764,95 @@ def write_step_by_step(metadata, data, format, splits_count, &block) while writing_splits_index < splits.size chunk = get_next_chunk.call errors = [] + # The chunk must be locked until being passed to &block. + chunk.mon_enter modified_chunks << {chunk: chunk, adding_bytesize: 0, errors: errors} - chunk.synchronize do - raise ShouldRetry unless chunk.writable? - staged_chunk_used = true if chunk.staged? - - original_bytesize = committed_bytesize = chunk.bytesize - begin - while writing_splits_index < splits.size - split = splits[writing_splits_index] - formatted_split = format ? format.call(split) : nil - if split.size == 1 # Check BufferChunkOverflowError - determined_bytesize = nil - if @compress != :text - determined_bytesize = nil - elsif formatted_split - determined_bytesize = formatted_split.bytesize - elsif split.first.respond_to?(:bytesize) - determined_bytesize = split.first.bytesize - end + raise ShouldRetry unless chunk.writable? + staged_chunk_used = true if chunk.staged? - if determined_bytesize && determined_bytesize > @chunk_limit_size - # It is a obvious case that BufferChunkOverflowError should be raised here. - # But if it raises here, already processed 'split' or - # the proceeding 'split' will be lost completely. - # So it is a last resort to delay raising such a exception - errors << "a #{determined_bytesize} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})" - writing_splits_index += 1 - next - end + original_bytesize = committed_bytesize = chunk.bytesize + begin + while writing_splits_index < splits.size + split = splits[writing_splits_index] + formatted_split = format ? format.call(split) : nil - if determined_bytesize.nil? || chunk.bytesize + determined_bytesize > @chunk_limit_size - # The split will (might) cause size over so keep already processed - # 'split' content here (allow performance regression a bit). - chunk.commit - committed_bytesize = chunk.bytesize - end + if split.size == 1 # Check BufferChunkOverflowError + determined_bytesize = nil + if @compress != :text + determined_bytesize = nil + elsif formatted_split + determined_bytesize = formatted_split.bytesize + elsif split.first.respond_to?(:bytesize) + determined_bytesize = split.first.bytesize end - if format - chunk.concat(formatted_split, split.size) - else - chunk.append(split, compress: @compress) + if determined_bytesize && determined_bytesize > @chunk_limit_size + # It is a obvious case that BufferChunkOverflowError should be raised here. + # But if it raises here, already processed 'split' or + # the proceeding 'split' will be lost completely. + # So it is a last resort to delay raising such a exception + errors << "a #{determined_bytesize} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})" + writing_splits_index += 1 + next end - adding_bytes = chunk.bytesize - committed_bytesize - if chunk_size_over?(chunk) # split size is larger than difference between size_full? and size_over? - chunk.rollback + if determined_bytesize.nil? || chunk.bytesize + determined_bytesize > @chunk_limit_size + # The split will (might) cause size over so keep already processed + # 'split' content here (allow performance regression a bit). + chunk.commit committed_bytesize = chunk.bytesize + end + end - if split.size == 1 # Check BufferChunkOverflowError again - if adding_bytes > @chunk_limit_size - errors << "concatenated/appended a #{adding_bytes} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})" - writing_splits_index += 1 - next - else - # As already processed content is kept after rollback, then unstaged chunk should be queued. - # After that, re-process current split again. - # New chunk should be allocated, to do it, modify @stage and so on. - synchronize { @stage.delete(modified_metadata) } - staged_chunk_used = false - chunk.unstaged! - break - end - end + if format + chunk.concat(formatted_split, split.size) + else + chunk.append(split, compress: @compress) + end + adding_bytes = chunk.bytesize - committed_bytesize - if chunk_size_full?(chunk) || split.size == 1 - enqueue_chunk_before_retry = true + if chunk_size_over?(chunk) # split size is larger than difference between size_full? and size_over? + chunk.rollback + committed_bytesize = chunk.bytesize + + if split.size == 1 # Check BufferChunkOverflowError again + if adding_bytes > @chunk_limit_size + errors << "concatenated/appended a #{adding_bytes} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})" + writing_splits_index += 1 + next else - splits_count *= 10 + # As already processed content is kept after rollback, then unstaged chunk should be queued. + # After that, re-process current split again. + # New chunk should be allocated, to do it, modify @stage and so on. + synchronize { @stage.delete(modified_metadata) } + staged_chunk_used = false + chunk.unstaged! + break end + end - raise ShouldRetry + if chunk_size_full?(chunk) || split.size == 1 + enqueue_chunk_before_retry = true + else + splits_count *= 10 end - writing_splits_index += 1 + raise ShouldRetry + end - if chunk_size_full?(chunk) - break - end + writing_splits_index += 1 + + if chunk_size_full?(chunk) + break end - rescue - chunk.purge if chunk.unstaged? # unstaged chunk will leak unless purge it - raise end - - modified_chunks.last[:adding_bytesize] = chunk.bytesize - original_bytesize + rescue + chunk.purge if chunk.unstaged? # unstaged chunk will leak unless purge it + raise end + + modified_chunks.last[:adding_bytesize] = chunk.bytesize - original_bytesize end modified_chunks.each do |data| block.call(data[:chunk], data[:adding_bytesize], data[:errors]) @@ -863,9 +864,15 @@ def write_step_by_step(metadata, data, format, splits_count, &block) if chunk.unstaged? chunk.purge rescue nil end + chunk.mon_exit rescue nil end enqueue_chunk(metadata) if enqueue_chunk_before_retry retry + ensure + modified_chunks.each do |data| + chunk = data[:chunk] + chunk.mon_exit + end end STATS_KEYS = [