Skip to content

Commit 7806f1a

Browse files
authored
filebeat/inputs/filestream: add metric for messages truncated (#41667)
* filebeat: log when multiline reader truncates messages While investigating an SDH, I noticed that although we add the truncated label to log fields, there is no feedback explaining why the messages were truncated. This PR adds a log statement to provide insight whenever multiline messages are truncated.
1 parent 111a480 commit 7806f1a

File tree

6 files changed

+181
-51
lines changed

6 files changed

+181
-51
lines changed

filebeat/docs/inputs/input-filestream.asciidoc

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -175,19 +175,20 @@ include::../inputs/input-filestream-reader-options.asciidoc[]
175175

176176
This input exposes metrics under the <<http-endpoint, HTTP monitoring endpoint>>.
177177
These metrics are exposed under the `/inputs` path. They can be used to
178-
observe the activity of the input.
178+
observe the activity of the input. Note that metrics from processors are not included.
179179

180180
[options="header"]
181181
|=======
182-
| Metric | Description
183-
| `files_opened_total` | Total number of files opened.
184-
| `files_closed_total` | Total number of files closed.
185-
| `files_active` | Number of files currently open (gauge).
186-
| `messages_read_total` | Total number of messages read.
187-
| `bytes_processed_total` | Total number of bytes processed.
188-
| `events_processed_total` | Total number of events processed.
189-
| `processing_errors_total` | Total number of processing errors.
190-
| `processing_time` | Histogram of the elapsed time to process messages (expressed in nanoseconds).
182+
| Metric | Description
183+
| `files_opened_total` | Total number of files opened.
184+
| `files_closed_total` | Total number of files closed.
185+
| `files_active` | Number of files currently open (gauge).
186+
| `messages_read_total` | Total number of messages read.
187+
| `messages_truncated_total` | Total number of messages truncated.
188+
| `bytes_processed_total` | Total number of bytes processed.
189+
| `events_processed_total` | Total number of events processed.
190+
| `processing_errors_total` | Total number of processing errors.
191+
| `processing_time` | Histogram of the elapsed time to process messages (expressed in nanoseconds).
191192
|=======
192193

193194
Note:

filebeat/input/filestream/environment_test.go

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ func (e *inputTestingEnvironment) requireNoEntryInRegistry(filename, inputID str
345345
func (e *inputTestingEnvironment) requireOffsetInRegistryByID(key string, expectedOffset int) {
346346
entry, err := e.getRegistryState(key)
347347
if err != nil {
348-
e.t.Fatalf(err.Error())
348+
e.t.Fatal(err.Error())
349349
}
350350

351351
require.Equal(e.t, expectedOffset, entry.Cursor.Offset)
@@ -416,12 +416,18 @@ func (e *inputTestingEnvironment) waitUntilEventCountCtx(ctx context.Context, co
416416
select {
417417
case <-ctx.Done():
418418
logLines := map[string][]string{}
419-
for _, e := range e.pipeline.GetAllEvents() {
420-
flat := e.Fields.Flatten()
419+
for _, evt := range e.pipeline.GetAllEvents() {
420+
flat := evt.Fields.Flatten()
421421
pathi, _ := flat.GetValue("log.file.path")
422-
path := pathi.(string)
422+
path, ok := pathi.(string)
423+
if !ok {
424+
e.t.Fatalf("waitUntilEventCountCtx: path is not a string: %v", pathi)
425+
}
423426
msgi, _ := flat.GetValue("message")
424-
msg := msgi.(string)
427+
msg, ok := msgi.(string)
428+
if !ok {
429+
e.t.Fatalf("waitUntilEventCountCtx: message is not a string: %v", msgi)
430+
}
425431
logLines[path] = append(logLines[path], msg)
426432
}
427433

@@ -468,7 +474,10 @@ func (e *inputTestingEnvironment) requireEventsReceived(events []string) {
468474
if len(events) == checkedEventCount {
469475
e.t.Fatalf("not enough expected elements")
470476
}
471-
message := evt.Fields["message"].(string)
477+
message, ok := evt.Fields["message"].(string)
478+
if !ok {
479+
e.t.Fatalf("message is not string %+v", evt.Fields["message"])
480+
}
472481
if message == events[checkedEventCount] {
473482
foundEvents[checkedEventCount] = true
474483
}

filebeat/input/filestream/input.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"fmt"
2323
"io"
2424
"os"
25+
"slices"
2526
"time"
2627

2728
"golang.org/x/text/transform"
@@ -380,6 +381,15 @@ func (inp *filestream) readFromSource(
380381

381382
s.Offset += int64(message.Bytes) + int64(message.Offset)
382383

384+
flags, err := message.Fields.GetValue("log.flags")
385+
if err == nil {
386+
if flags, ok := flags.([]string); ok {
387+
if slices.Contains(flags, "truncated") { //nolint:typecheck,nolintlint // linter fails to infer generics
388+
metrics.MessagesTruncated.Add(1)
389+
}
390+
}
391+
}
392+
383393
metrics.MessagesRead.Inc()
384394
if message.IsEmpty() || inp.isDroppedLine(log, string(message.Content)) {
385395
continue

filebeat/input/filestream/internal/input-logfile/metrics.go

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,15 @@ import (
2929
type Metrics struct {
3030
unregister func()
3131

32-
FilesOpened *monitoring.Uint // Number of files that have been opened.
33-
FilesClosed *monitoring.Uint // Number of files closed.
34-
FilesActive *monitoring.Uint // Number of files currently open (gauge).
35-
MessagesRead *monitoring.Uint // Number of messages read.
36-
BytesProcessed *monitoring.Uint // Number of bytes processed.
37-
EventsProcessed *monitoring.Uint // Number of events processed.
38-
ProcessingErrors *monitoring.Uint // Number of processing errors.
39-
ProcessingTime metrics.Sample // Histogram of the elapsed time for processing an event.
32+
FilesOpened *monitoring.Uint // Number of files that have been opened.
33+
FilesClosed *monitoring.Uint // Number of files closed.
34+
FilesActive *monitoring.Uint // Number of files currently open (gauge).
35+
MessagesRead *monitoring.Uint // Number of messages read.
36+
MessagesTruncated *monitoring.Uint // Number of messages truncated.
37+
BytesProcessed *monitoring.Uint // Number of bytes processed.
38+
EventsProcessed *monitoring.Uint // Number of events processed.
39+
ProcessingErrors *monitoring.Uint // Number of processing errors.
40+
ProcessingTime metrics.Sample // Histogram of the elapsed time for processing an event.
4041

4142
// Those metrics use the same registry/keys as the log input uses
4243
HarvesterStarted *monitoring.Int
@@ -65,15 +66,16 @@ func NewMetrics(id string) *Metrics {
6566

6667
reg, unreg := inputmon.NewInputRegistry("filestream", id, nil)
6768
m := Metrics{
68-
unregister: unreg,
69-
FilesOpened: monitoring.NewUint(reg, "files_opened_total"),
70-
FilesClosed: monitoring.NewUint(reg, "files_closed_total"),
71-
FilesActive: monitoring.NewUint(reg, "files_active"),
72-
MessagesRead: monitoring.NewUint(reg, "messages_read_total"),
73-
BytesProcessed: monitoring.NewUint(reg, "bytes_processed_total"),
74-
EventsProcessed: monitoring.NewUint(reg, "events_processed_total"),
75-
ProcessingErrors: monitoring.NewUint(reg, "processing_errors_total"),
76-
ProcessingTime: metrics.NewUniformSample(1024),
69+
unregister: unreg,
70+
FilesOpened: monitoring.NewUint(reg, "files_opened_total"),
71+
FilesClosed: monitoring.NewUint(reg, "files_closed_total"),
72+
FilesActive: monitoring.NewUint(reg, "files_active"),
73+
MessagesRead: monitoring.NewUint(reg, "messages_read_total"),
74+
MessagesTruncated: monitoring.NewUint(reg, "messages_truncated_total"),
75+
BytesProcessed: monitoring.NewUint(reg, "bytes_processed_total"),
76+
EventsProcessed: monitoring.NewUint(reg, "events_processed_total"),
77+
ProcessingErrors: monitoring.NewUint(reg, "processing_errors_total"),
78+
ProcessingTime: metrics.NewUniformSample(1024),
7779

7880
HarvesterStarted: monitoring.NewInt(harvesterMetrics, "started"),
7981
HarvesterClosed: monitoring.NewInt(harvesterMetrics, "closed"),

filebeat/input/filestream/metrics_integration_test.go

Lines changed: 124 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,22 @@ func TestFilestreamMetrics(t *testing.T) {
4040
"close.on_state_change.inactive": "2s",
4141
"prospector.scanner.fingerprint.enabled": false,
4242
"file_identity.native": map[string]any{},
43+
"message_max_bytes": 20,
44+
"parsers": []map[string]interface{}{
45+
{
46+
"multiline": map[string]interface{}{
47+
"type": "pattern",
48+
"pattern": "^multiline",
49+
"negate": true,
50+
"match": "after",
51+
"max_lines": 1,
52+
"timeout": "1s",
53+
},
54+
},
55+
},
4356
})
4457

45-
testlines := []byte("first line\nsecond line\nthird line\n")
58+
testlines := []byte("first line\nsecond line\nthird line\nthis is a very long line exceeding message_max_bytes\nmultiline first line\nmultiline second line\n")
4659
env.mustWriteToFile(testlogName, testlines)
4760

4861
ctx, cancelInput := context.WithCancel(context.Background())
@@ -53,37 +66,132 @@ func TestFilestreamMetrics(t *testing.T) {
5366
env.waitUntilHarvesterIsDone()
5467

5568
checkMetrics(t, "fake-ID", expectedMetrics{
56-
FilesOpened: 1,
57-
FilesClosed: 1,
58-
FilesActive: 0,
59-
MessagesRead: 3,
60-
BytesProcessed: 34,
61-
EventsProcessed: 3,
62-
ProcessingErrors: 0,
69+
FilesOpened: 1,
70+
FilesClosed: 1,
71+
FilesActive: 0,
72+
MessagesRead: 3,
73+
MessagesTruncated: 2,
74+
BytesProcessed: 130,
75+
EventsProcessed: 3,
76+
ProcessingErrors: 0,
77+
})
78+
79+
cancelInput()
80+
env.waitUntilInputStops()
81+
}
82+
83+
func TestFilestreamMessageMaxBytesTruncatedMetric(t *testing.T) {
84+
env := newInputTestingEnvironment(t)
85+
86+
testlogName := "test.log"
87+
inp := env.mustCreateInput(map[string]interface{}{
88+
"id": "fake-ID",
89+
"paths": []string{env.abspath(testlogName)},
90+
"prospector.scanner.check_interval": "24h",
91+
"close.on_state_change.check_interval": "100ms",
92+
"close.on_state_change.inactive": "2s",
93+
"prospector.scanner.fingerprint.enabled": false,
94+
"file_identity.native": map[string]any{},
95+
"message_max_bytes": 20,
96+
})
97+
98+
testlines := []byte("first line\nsecond line\nthird line\nthis is a long line exceeding message_max_bytes\n")
99+
env.mustWriteToFile(testlogName, testlines)
100+
101+
ctx, cancelInput := context.WithCancel(context.Background())
102+
env.startInput(ctx, inp)
103+
104+
env.waitUntilEventCount(4)
105+
env.requireOffsetInRegistry(testlogName, "fake-ID", len(testlines))
106+
env.waitUntilHarvesterIsDone()
107+
108+
checkMetrics(t, "fake-ID", expectedMetrics{
109+
FilesOpened: 1,
110+
FilesClosed: 1,
111+
FilesActive: 0,
112+
MessagesRead: 4,
113+
MessagesTruncated: 1,
114+
BytesProcessed: 82,
115+
EventsProcessed: 4,
116+
ProcessingErrors: 0,
117+
})
118+
119+
cancelInput()
120+
env.waitUntilInputStops()
121+
}
122+
123+
func TestFilestreamMultilineMaxLinesTruncatedMetric(t *testing.T) {
124+
env := newInputTestingEnvironment(t)
125+
126+
testlogName := "test.log"
127+
inp := env.mustCreateInput(map[string]interface{}{
128+
"id": "fake-ID",
129+
"paths": []string{env.abspath(testlogName)},
130+
"prospector.scanner.check_interval": "24h",
131+
"close.on_state_change.check_interval": "100ms",
132+
"close.on_state_change.inactive": "2s",
133+
"prospector.scanner.fingerprint.enabled": false,
134+
"file_identity.native": map[string]any{},
135+
"parsers": []map[string]interface{}{
136+
{
137+
"multiline": map[string]interface{}{
138+
"type": "pattern",
139+
"pattern": "^multiline",
140+
"negate": true,
141+
"match": "after",
142+
"max_lines": 1,
143+
"timeout": "1s",
144+
},
145+
},
146+
},
147+
})
148+
149+
testlines := []byte("first line\nsecond line\nmultiline first line\nmultiline second line\n")
150+
env.mustWriteToFile(testlogName, testlines)
151+
152+
ctx, cancelInput := context.WithCancel(context.Background())
153+
env.startInput(ctx, inp)
154+
155+
env.waitUntilEventCount(3)
156+
env.requireOffsetInRegistry(testlogName, "fake-ID", len(testlines))
157+
env.waitUntilHarvesterIsDone()
158+
159+
checkMetrics(t, "fake-ID", expectedMetrics{
160+
FilesOpened: 1,
161+
FilesClosed: 1,
162+
FilesActive: 0,
163+
MessagesRead: 3,
164+
MessagesTruncated: 1,
165+
BytesProcessed: 66,
166+
EventsProcessed: 3,
167+
ProcessingErrors: 0,
63168
})
64169

65170
cancelInput()
66171
env.waitUntilInputStops()
67172
}
68173

69174
type expectedMetrics struct {
70-
FilesOpened uint64
71-
FilesClosed uint64
72-
FilesActive uint64
73-
MessagesRead uint64
74-
BytesProcessed uint64
75-
EventsProcessed uint64
76-
ProcessingErrors uint64
175+
FilesOpened uint64
176+
FilesClosed uint64
177+
FilesActive uint64
178+
MessagesRead uint64
179+
MessagesTruncated uint64
180+
BytesProcessed uint64
181+
EventsProcessed uint64
182+
ProcessingErrors uint64
77183
}
78184

79185
func checkMetrics(t *testing.T, id string, expected expectedMetrics) {
80-
reg := monitoring.GetNamespace("dataset").GetRegistry().Get(id).(*monitoring.Registry)
186+
reg, ok := monitoring.GetNamespace("dataset").GetRegistry().Get(id).(*monitoring.Registry)
187+
require.True(t, ok, "registry not found")
81188

82189
require.Equal(t, id, reg.Get("id").(*monitoring.String).Get(), "id")
83190
require.Equal(t, "filestream", reg.Get("input").(*monitoring.String).Get(), "input")
84191
require.Equal(t, expected.FilesOpened, reg.Get("files_opened_total").(*monitoring.Uint).Get(), "files_opened_total")
85192
require.Equal(t, expected.FilesClosed, reg.Get("files_closed_total").(*monitoring.Uint).Get(), "files_closed_total")
86193
require.Equal(t, expected.MessagesRead, reg.Get("messages_read_total").(*monitoring.Uint).Get(), "messages_read_total")
194+
require.Equal(t, expected.MessagesTruncated, reg.Get("messages_truncated_total").(*monitoring.Uint).Get(), "messages_truncated_total")
87195
require.Equal(t, expected.BytesProcessed, reg.Get("bytes_processed_total").(*monitoring.Uint).Get(), "bytes_processed_total")
88196
require.Equal(t, expected.EventsProcessed, reg.Get("events_processed_total").(*monitoring.Uint).Get(), "events_processed_total")
89197
require.Equal(t, expected.ProcessingErrors, reg.Get("processing_errors_total").(*monitoring.Uint).Get(), "processing_errors_total")

libbeat/reader/multiline/message_buffer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,11 @@ func (b *messageBuffer) addLine(m reader.Message) {
120120
// finalize writes the existing content into the returned message and resets all reader variables.
121121
func (b *messageBuffer) finalize() reader.Message {
122122
if b.truncated > 0 {
123-
b.message.AddFlagsWithKey("log.flags", "truncated")
123+
b.message.AddFlagsWithKey("log.flags", "truncated") //nolint:errcheck // It is safe to ignore the error.
124124
}
125125

126126
if b.numLines > 1 {
127-
b.message.AddFlagsWithKey("log.flags", "multiline")
127+
b.message.AddFlagsWithKey("log.flags", "multiline") //nolint:errcheck // It is safe to ignore the error.
128128
}
129129

130130
// Copy message from existing content

0 commit comments

Comments
 (0)