Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add exclude_lines and include_lines options #430

Merged
merged 1 commit into from
Dec 7, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion filebeat/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ type HarvesterConfig struct {
BackoffFactor int `yaml:"backoff_factor"`
MaxBackoff string `yaml:"max_backoff"`
MaxBackoffDuration time.Duration
ForceCloseFiles bool `yaml:"force_close_files"`
ForceCloseFiles bool `yaml:"force_close_files"`
ExcludeLines []string `yaml:"exclude_lines"`
IncludeLines []string `yaml:"include_lines"`
}

const (
Expand Down
30 changes: 30 additions & 0 deletions filebeat/docs/configuration.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,36 @@ One of the following input types:

The value that you specify here is used as the `input_type` for each event published to Logstash and Elasticsearch.

===== exclude_lines

A list of regular expressions to match the lines that are dropped. It drops the lines that are matching any regular
expression from the list. By default, no lines are dropped.

[source,yaml]
-------------------------------------------------------------------------------------
exclude_lines: ["^DBG"]
-------------------------------------------------------------------------------------
To exclude the lines starting with "DBG".

===== include_lines

A list of regular expressions to match the lines that are exported. It exports only the lines that are matching any regular expression from the list. By default, all lines are exported.

[source,yaml]
-------------------------------------------------------------------------------------
include_lines: ["^ERR", "^WARN"]
-------------------------------------------------------------------------------------
To include only the lines starting with "ERR" or "WARN".

Note::
If both `include_lines` and `exclude_lines` are defined, then include_lines is called first. To export all the apache logs except the DBGs, then you can use:

[source,yaml]
-------------------------------------------------------------------------------------
include_lines: ["apache"]
exclude_lines: ["^DBG"]
-------------------------------------------------------------------------------------

[[configuration-fields]]
===== fields

Expand Down
10 changes: 10 additions & 0 deletions filebeat/etc/beat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,16 @@ filebeat:
# * stdin: Reads the standard in
input_type: log

# Exclude lines. A list of regular expressions to match. It drops the lines that are
# matching any regular expression from the list. The include_lines is called before
# exclude_lines. By default, no lines are dropped.
# exclude_lines: ["^DBG"]

# Include lines. A list of regular expressions to match. It exports the lines that are
# matching any regular expression from the list. The include_lines is called before
# exclude_lines. By default, all the lines are exported.
# include_lines: ["^ERR", "^WARN"]

# Optional additional fields. These field can be freely picked
# to add additional information to the crawled log files for filtering
#fields:
Expand Down
13 changes: 10 additions & 3 deletions filebeat/etc/filebeat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,16 @@ filebeat:
# * stdin: Reads the standard in
input_type: log

# Exclude lines. A list of regular expressions to match. It drops the lines that are
# matching any regular expression from the list. The include_lines is called before
# exclude_lines. By default, no lines are dropped.
# exclude_lines: ["^DBG"]

# Include lines. A list of regular expressions to match. It exports the lines that are
# matching any regular expression from the list. The include_lines is called before
# exclude_lines. By default, all the lines are exported.
# include_lines: ["^ERR", "^WARN"]

# Optional additional fields. These field can be freely picked
# to add additional information to the crawled log files for filtering
#fields:
Expand Down Expand Up @@ -163,9 +173,6 @@ output:
# Optional HTTP Path
#path: "/elasticsearch"

# Proxy server URL
# proxy_url: http://proxy:3128

# The number of times a particular Elasticsearch index operation is attempted. If
# the indexing operation doesn't succeed after this many retries, the events are
# dropped. The default is 3.
Expand Down
21 changes: 12 additions & 9 deletions filebeat/harvester/harvester.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package harvester
import (
"io"
"os"
"regexp"
"time"

"github.com/elastic/beats/filebeat/config"
Expand All @@ -24,15 +25,17 @@ import (
)

type Harvester struct {
Path string /* the file path to harvest */
ProspectorConfig config.ProspectorConfig
Config *config.HarvesterConfig
Offset int64
Stat *FileStat
SpoolerChan chan *input.FileEvent
encoding encoding.EncodingFactory
file FileSource /* the file being watched */
backoff time.Duration
Path string /* the file path to harvest */
ProspectorConfig config.ProspectorConfig
Config *config.HarvesterConfig
Offset int64
Stat *FileStat
SpoolerChan chan *input.FileEvent
encoding encoding.EncodingFactory
file FileSource /* the file being watched */
backoff time.Duration
ExcludeLinesRegexp []*regexp.Regexp
IncludeLinesRegexp []*regexp.Regexp
}

// Contains statistic about file when it was last seend by the prospector
Expand Down
91 changes: 78 additions & 13 deletions filebeat/harvester/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"io"
"os"
"regexp"
"time"

"golang.org/x/text/transform"
Expand All @@ -22,6 +23,7 @@ func NewHarvester(
stat *FileStat,
spooler chan *input.FileEvent,
) (*Harvester, error) {
var err error
encoding, ok := encoding.FindEncoding(cfg.Encoding)
if !ok || encoding == nil {
return nil, fmt.Errorf("unknown encoding('%v')", cfg.Encoding)
Expand All @@ -36,6 +38,14 @@ func NewHarvester(
encoding: encoding,
backoff: prospectorCfg.Harvester.BackoffDuration,
}
h.ExcludeLinesRegexp, err = InitRegexps(cfg.ExcludeLines)
if err != nil {
return h, err
}
h.IncludeLinesRegexp, err = InitRegexps(cfg.IncludeLines)
if err != nil {
return h, err
}
return h, nil
}

Expand Down Expand Up @@ -104,24 +114,50 @@ func (h *Harvester) Harvest() {
// Reset Backoff
h.backoff = h.Config.BackoffDuration

// Sends text to spooler
event := &input.FileEvent{
ReadTime: lastReadTime,
Source: &h.Path,
InputType: h.Config.InputType,
DocumentType: h.Config.DocumentType,
Offset: h.Offset,
Bytes: bytesRead,
Text: &text,
Fields: &h.Config.Fields,
Fileinfo: &info,
if h.shouldExportLine(text) {

// Sends text to spooler
event := &input.FileEvent{
ReadTime: lastReadTime,
Source: &h.Path,
InputType: h.Config.InputType,
DocumentType: h.Config.DocumentType,
Offset: h.Offset,
Bytes: bytesRead,
Text: &text,
Fields: &h.Config.Fields,
Fileinfo: &info,
}

event.SetFieldsUnderRoot(h.Config.FieldsUnderRoot)
h.SpoolerChan <- event // ship the new event downstream
}

// Set Offset
h.Offset += int64(bytesRead) // Update offset if complete line has been processed
}
}

event.SetFieldsUnderRoot(h.Config.FieldsUnderRoot)
h.SpoolerChan <- event // ship the new event downstream
// shouldExportLine decides if the line is exported or not based on
// the include_lines and exclude_lines options.
func (h *Harvester) shouldExportLine(line string) bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it on purpose that both include_lines and exclude_lines can both be set? We should probably mention the order it is checked in the docs.

In case a line is matched by include_line and exclude_line, the line is excluded.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you can enable both options to have use cases like looking only at the apache logs and excluding the DBG messages from them. I updated the doc to explain this.

if len(h.IncludeLinesRegexp) > 0 {
if !MatchAnyRegexps(h.IncludeLinesRegexp, line) {
// drop line
logp.Debug("harvester", "Drop line as it does not match any of the include patterns %s", line)
return false
}
}
if len(h.ExcludeLinesRegexp) > 0 {
if MatchAnyRegexps(h.ExcludeLinesRegexp, line) {
// drop line
logp.Debug("harvester", "Drop line as it does match one of the exclude patterns%s", line)
return false
}
}

return true

}

// backOff checks the backoff variable and sleeps for the given time
Expand Down Expand Up @@ -297,6 +333,35 @@ func (h *Harvester) handleReadlineError(lastTimeRead time.Time, err error) error
func (h *Harvester) Stop() {
}

func InitRegexps(exprs []string) ([]*regexp.Regexp, error) {

result := []*regexp.Regexp{}

for _, exp := range exprs {

rexp, err := regexp.CompilePOSIX(exp)
if err != nil {
logp.Err("Fail to compile the regexp %s: %s", exp, err)
return nil, err
}
result = append(result, rexp)
}
return result, nil
}

func MatchAnyRegexps(regexps []*regexp.Regexp, text string) bool {

for _, rexp := range regexps {
if rexp.MatchString(text) {
// drop line
return true

}
}

return false
}

const maxConsecutiveEmptyReads = 100

// timedReader keeps track of last time bytes have been read from underlying
Expand Down
27 changes: 27 additions & 0 deletions filebeat/harvester/log_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,30 @@ func TestLineEndingChars(t *testing.T) {
line = []byte("NR ending \n\r")
assert.Equal(t, 0, lineEndingChars(line))
}

func TestExcludeLine(t *testing.T) {

regexp, err := InitRegexps([]string{"^DBG"})

assert.Nil(t, err)

assert.True(t, MatchAnyRegexps(regexp, "DBG: a debug message"))
assert.False(t, MatchAnyRegexps(regexp, "ERR: an error message"))
}

func TestIncludeLine(t *testing.T) {

regexp, err := InitRegexps([]string{"^ERR", "^WARN"})

assert.Nil(t, err)

assert.False(t, MatchAnyRegexps(regexp, "DBG: a debug message"))
assert.True(t, MatchAnyRegexps(regexp, "ERR: an error message"))
assert.True(t, MatchAnyRegexps(regexp, "WARNING: a simple warning message"))
}

func TestInitRegexp(t *testing.T) {

_, err := InitRegexps([]string{"((((("})
assert.NotNil(t, err)
}
7 changes: 7 additions & 0 deletions filebeat/tests/system/config/filebeat.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ filebeat:
{% endfor %}
{% endif %}
fields_under_root: {{"true" if fieldsUnderRoot else "false"}}
{% if include_lines %}
include_lines: {{include_lines}}
{% endif %}
{% if exclude_lines %}
exclude_lines: {{exclude_lines}}
{% endif %}

spool_size:
idle_timeout: 0.1s
registry_file: {{ fb.working_dir + '/' }}{{ registryFile|default(".filebeat")}}
Expand Down
Loading