Skip to content
This repository has been archived by the owner on Feb 5, 2025. It is now read-only.

Commit

Permalink
Fix issue with drop count calculations (#1256)
Browse files Browse the repository at this point in the history
  • Loading branch information
mlw authored Dec 13, 2023
1 parent 2e69370 commit f8a20d3
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 24 deletions.
1 change: 0 additions & 1 deletion Source/santad/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,6 @@ objc_library(
hdrs = ["Metrics.h"],
deps = [
":SNTApplicationCoreMetrics",
"//Source/common:BranchPrediction",
"//Source/common:SNTCommonEnums",
"//Source/common:SNTLogging",
"//Source/common:SNTMetricSet",
Expand Down
2 changes: 1 addition & 1 deletion Source/santad/Metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class Metrics : public std::enable_shared_from_this<Metrics> {

private:
struct SequenceStats {
uint64_t seq_num = 0;
uint64_t next_seq_num = 0;
int64_t drops = 0;
};

Expand Down
33 changes: 18 additions & 15 deletions Source/santad/Metrics.mm
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

#include <memory>

#include "Source/common/BranchPrediction.h"
#import "Source/common/SNTLogging.h"
#import "Source/common/SNTXPCMetricServiceInterface.h"
#import "Source/santad/SNTApplicationCoreMetrics.h"
Expand Down Expand Up @@ -295,21 +294,26 @@
]];
}

for (const auto &kv : drop_cache_) {
if (kv.second.drops > 0) {
NSString *processorName = ProcessorToString(std::get<Processor>(kv.first));
NSString *eventName = EventTypeToString(std::get<es_event_type_t>(kv.first));
for (auto &[key, stats] : drop_cache_) {
if (stats.drops > 0) {
NSString *processorName = ProcessorToString(std::get<Processor>(key));
NSString *eventName = EventTypeToString(std::get<es_event_type_t>(key));

[drop_counts_ incrementBy:kv.second.drops forFieldValues:@[ processorName, eventName ]];
[drop_counts_ incrementBy:stats.drops forFieldValues:@[ processorName, eventName ]];

// Reset drops to 0, but leave sequence number intact. Sequence numbers
// must persist so that accurate drops can be counted.
stats.drops = 0;
}
}

// Reset the maps so the next cycle begins with a clean state
// IMPORTANT: Do not reset drop_cache_, the sequence numbers must persist
// for accurate accounting
event_counts_cache_ = {};
event_times_cache_ = {};
rate_limit_counts_cache_ = {};
faa_event_counts_cache_ = {};
drop_cache_ = {};
});
}

Expand Down Expand Up @@ -370,11 +374,10 @@
SequenceStats old_global_stats = drop_cache_[global_stats_key];

// Sequence number should always increment by 1.
// Drops detected if the difference between sequence numbers is ever more than 1.
int64_t new_event_drops =
unlikely(msg->seq_num == 0) ? 0 : (msg->seq_num - old_event_stats.seq_num - 1);
int64_t new_global_drops =
unlikely(msg->global_seq_num == 0) ? 0 : (msg->global_seq_num - old_global_stats.seq_num - 1);
// Drops detected if there is a difference between the current sequence
// number and the expected sequence number
int64_t new_event_drops = msg->seq_num - old_event_stats.next_seq_num;
int64_t new_global_drops = msg->global_seq_num - old_global_stats.next_seq_num;

// Only log one or the other, prefer the event specific log.
// For higher volume event types, we'll normally see the event-specific log eventually
Expand All @@ -387,11 +390,11 @@
new_global_drops);
}

drop_cache_[event_stats_key] =
SequenceStats{.seq_num = msg->seq_num, .drops = old_event_stats.drops + new_event_drops};
drop_cache_[event_stats_key] = SequenceStats{.next_seq_num = msg->seq_num + 1,
.drops = old_event_stats.drops + new_event_drops};

drop_cache_[global_stats_key] = SequenceStats{
.seq_num = msg->global_seq_num, .drops = old_global_stats.drops + new_global_drops};
.next_seq_num = msg->global_seq_num + 1, .drops = old_global_stats.drops + new_global_drops};
});
}

Expand Down
27 changes: 20 additions & 7 deletions Source/santad/MetricsTest.mm
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,9 @@ - (void)testUpdateEventStats {

// After the first update, 2 entries exist, one for the event, and one for global
XCTAssertEqual(2, metrics->drop_cache_.size());
XCTAssertEqual(0, metrics->drop_cache_[eventStats].seq_num);
XCTAssertEqual(1, metrics->drop_cache_[eventStats].next_seq_num);
XCTAssertEqual(0, metrics->drop_cache_[eventStats].drops);
XCTAssertEqual(0, metrics->drop_cache_[globalStats].seq_num);
XCTAssertEqual(1, metrics->drop_cache_[globalStats].next_seq_num);
XCTAssertEqual(0, metrics->drop_cache_[globalStats].drops);

// Increment sequence numbers by 1 and check that no drop was detected
Expand All @@ -354,9 +354,9 @@ - (void)testUpdateEventStats {
metrics->UpdateEventStats(Processor::kRecorder, &esMsg);

XCTAssertEqual(2, metrics->drop_cache_.size());
XCTAssertEqual(1, metrics->drop_cache_[eventStats].seq_num);
XCTAssertEqual(2, metrics->drop_cache_[eventStats].next_seq_num);
XCTAssertEqual(0, metrics->drop_cache_[eventStats].drops);
XCTAssertEqual(1, metrics->drop_cache_[globalStats].seq_num);
XCTAssertEqual(2, metrics->drop_cache_[globalStats].next_seq_num);
XCTAssertEqual(0, metrics->drop_cache_[globalStats].drops);

// Now incremenet sequence numbers by a large amount to trigger drop detection
Expand All @@ -366,9 +366,9 @@ - (void)testUpdateEventStats {
metrics->UpdateEventStats(Processor::kRecorder, &esMsg);

XCTAssertEqual(2, metrics->drop_cache_.size());
XCTAssertEqual(11, metrics->drop_cache_[eventStats].seq_num);
XCTAssertEqual(12, metrics->drop_cache_[eventStats].next_seq_num);
XCTAssertEqual(9, metrics->drop_cache_[eventStats].drops);
XCTAssertEqual(11, metrics->drop_cache_[globalStats].seq_num);
XCTAssertEqual(12, metrics->drop_cache_[globalStats].next_seq_num);
XCTAssertEqual(9, metrics->drop_cache_[globalStats].drops);
}

Expand Down Expand Up @@ -418,6 +418,13 @@ - (void)testFlushMetrics {
XCTAssertEqual(metrics->faa_event_counts_cache_.size(), 1);
XCTAssertEqual(metrics->drop_cache_.size(), 2);

EventStatsTuple eventStats{Processor::kRecorder, esMsgWithDrops.event_type};
EventStatsTuple globalStats{Processor::kRecorder, ES_EVENT_TYPE_LAST};
XCTAssertEqual(metrics->drop_cache_[eventStats].next_seq_num, 124);
XCTAssertEqual(metrics->drop_cache_[eventStats].drops, 123);
XCTAssertEqual(metrics->drop_cache_[globalStats].next_seq_num, 124);
XCTAssertEqual(metrics->drop_cache_[globalStats].drops, 123);

metrics->FlushMetrics();

// Expected call count is 8:
Expand All @@ -436,7 +443,13 @@ - (void)testFlushMetrics {
XCTAssertEqual(metrics->event_times_cache_.size(), 0);
XCTAssertEqual(metrics->rate_limit_counts_cache_.size(), 0);
XCTAssertEqual(metrics->faa_event_counts_cache_.size(), 0);
XCTAssertEqual(metrics->drop_cache_.size(), 0);
// Note: The drop_cache_ should not be reset back to size 0. Instead, each
// entry has the sequence number left intact, but drop counts reset to 0.
XCTAssertEqual(metrics->drop_cache_.size(), 2);
XCTAssertEqual(metrics->drop_cache_[eventStats].next_seq_num, 124);
XCTAssertEqual(metrics->drop_cache_[eventStats].drops, 0);
XCTAssertEqual(metrics->drop_cache_[globalStats].next_seq_num, 124);
XCTAssertEqual(metrics->drop_cache_[globalStats].drops, 0);
}

@end

0 comments on commit f8a20d3

Please sign in to comment.