-
Notifications
You must be signed in to change notification settings - Fork 784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Critical dependency logging #4988
Changes from 19 commits
0087a0e
39b2533
57baf2d
71ad6a0
dc46e92
b3e5ea0
94573c7
a18c189
908ad21
9dd307d
9a6d862
5b2395b
de2840e
d6ff17e
a158865
1ffac2c
85deec7
46e0ec9
c2b7423
f6c4d27
bedf55f
7e227a0
5bbf27f
6ce1a4e
c222b36
e256953
72ee781
509a421
0d73f76
f25dcb6
5abb2c5
958bb7b
ebf4b29
f7dea99
32bbbfd
b2f9d96
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,14 +6,24 @@ use lighthouse_metrics::{ | |
}; | ||
use slog::Logger; | ||
use slog_term::Decorator; | ||
use std::collections::HashMap; | ||
use std::io::{Result, Write}; | ||
use std::path::PathBuf; | ||
use std::time::{Duration, Instant}; | ||
use tracing_logging_layer::TRACING_LOGGING_DEPENDENCIES; | ||
|
||
pub const MAX_MESSAGE_WIDTH: usize = 40; | ||
|
||
pub mod async_record; | ||
mod sse_logging_components; | ||
mod tracing_logging_layer; | ||
mod tracing_metrics_layer; | ||
|
||
pub use sse_logging_components::SSELoggingComponents; | ||
pub use tracing_logging_layer::LoggingLayer; | ||
pub use tracing_logging_layer::NonBlockingFileWriter; | ||
pub use tracing_metrics_layer::MetricsLayer; | ||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; | ||
|
||
/// The minimum interval between log messages indicating that a queue is full. | ||
const LOG_DEBOUNCE_INTERVAL: Duration = Duration::from_secs(30); | ||
|
@@ -214,6 +224,56 @@ impl TimeLatch { | |
} | ||
} | ||
|
||
pub fn create_tracing_layer(logfile_max_size: u64, base_tracing_log_path: PathBuf) { | ||
let filter_layer = match tracing_subscriber::EnvFilter::try_from_default_env() | ||
.or_else(|_| tracing_subscriber::EnvFilter::try_new("warn")) | ||
{ | ||
Ok(filter) => filter, | ||
Err(e) => { | ||
eprintln!("Failed to initialize dependency tracing {e}"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think at this point we don't have a useable slogger, so we are forced into prtinln! right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep that's the reason |
||
return; | ||
} | ||
}; | ||
|
||
let mut file_writer_streams: HashMap<String, NonBlockingFileWriter> = HashMap::new(); | ||
|
||
for dependency in TRACING_LOGGING_DEPENDENCIES.iter() { | ||
init_file_writer_stream( | ||
&mut file_writer_streams, | ||
base_tracing_log_path.clone(), | ||
dependency, | ||
logfile_max_size * 1_024 * 1_024, | ||
); | ||
} | ||
|
||
if let Err(e) = tracing_subscriber::fmt() | ||
.with_env_filter(filter_layer) | ||
.finish() | ||
.with(MetricsLayer) | ||
.with(LoggingLayer { | ||
file_writer_streams, | ||
}) | ||
.try_init() | ||
{ | ||
eprintln!("Failed to initialize dependency tracing {e}"); | ||
} | ||
} | ||
|
||
fn init_file_writer_stream( | ||
file_writer_streams: &mut HashMap<String, NonBlockingFileWriter>, | ||
base_path: PathBuf, | ||
file_name: &str, | ||
max_file_size: u64, | ||
) { | ||
let file_path = base_path.join(file_name).with_extension("log"); | ||
let Ok(file_writer) = NonBlockingFileWriter::new(file_path.as_path(), max_file_size) else { | ||
eprintln!("Failed to create tracing file stream for {file_name}"); | ||
return; | ||
}; | ||
|
||
file_writer_streams.insert(file_name.to_string(), file_writer); | ||
} | ||
|
||
/// Return a logger suitable for test usage. | ||
/// | ||
/// By default no logs will be printed, but they can be enabled via | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
use std::collections::HashMap; | ||
use std::fs::{self, create_dir_all, File, OpenOptions}; | ||
use std::io::Write; | ||
use std::path::PathBuf; | ||
use std::sync::mpsc::{self, Sender}; | ||
use std::thread; | ||
|
||
lazy_static! { | ||
pub static ref TRACING_LOGGING_DEPENDENCIES: Vec<String> = | ||
vec!["libp2p".to_string(), "discv5".to_string()]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note that none of the
we probably need to decide which of these we want and what to do with enabled logs of deps not on the list @AgeManning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm now capturing libp2p_gossipsub, but can make changes to include some, or all, of the libp2p dependencies based on what we decide There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I think just gossipsub is fine initially. We can maybe add some others in later if things get really spicy. |
||
} | ||
|
||
/// Layer that handles `INFO`, `WARN` and `ERROR` logs emitted per dependency and | ||
/// writes them to a file. Dependencies are enabled via the `RUST_LOG` env flag. | ||
pub struct LoggingLayer { | ||
pub file_writer_streams: HashMap<String, NonBlockingFileWriter>, | ||
} | ||
|
||
impl<S: tracing_core::Subscriber> tracing_subscriber::layer::Layer<S> for LoggingLayer { | ||
fn on_event( | ||
&self, | ||
event: &tracing_core::Event<'_>, | ||
_ctx: tracing_subscriber::layer::Context<'_, S>, | ||
) { | ||
let meta = event.metadata(); | ||
|
||
let target = match meta.target().split_once("::") { | ||
Some((crate_name, _)) => crate_name, | ||
None => "unknown", | ||
}; | ||
|
||
let Some(file_writer) = self.file_writer_streams.get(target) else { | ||
return; | ||
}; | ||
|
||
let mut visitor = LogMessageExtractor { | ||
message: String::default(), | ||
}; | ||
|
||
event.record(&mut visitor); | ||
|
||
match *meta.level() { | ||
tracing_core::Level::INFO => { | ||
let _ = file_writer.write(visitor.message); | ||
} | ||
tracing_core::Level::WARN => { | ||
let _ = file_writer.write(visitor.message); | ||
} | ||
tracing_core::Level::ERROR => { | ||
let _ = file_writer.write(visitor.message); | ||
} | ||
_ => {} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you filtering out debug messages? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no real reason, I'll add debug messages There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't think you should be filtering here by level, since that can be controlled with the |
||
} | ||
} | ||
|
||
pub struct NonBlockingFileWriter { | ||
sender: Sender<String>, | ||
} | ||
|
||
impl NonBlockingFileWriter { | ||
pub fn new(path: &std::path::Path, max_file_size: u64) -> Result<Self, std::io::Error> { | ||
let (sender, receiver) = mpsc::channel(); | ||
let path = path.to_path_buf(); | ||
|
||
thread::spawn(move || { | ||
if !path.exists() { | ||
let mut dir = path.clone(); | ||
dir.pop(); | ||
|
||
// Create the necessary directories for the correct service and network. | ||
if !dir.exists() { | ||
let res = create_dir_all(dir); | ||
|
||
match res { | ||
Ok(_) => (), | ||
Err(e) => { | ||
eprintln!("Failed to create dir: {:?}", e); | ||
return; | ||
} | ||
} | ||
} | ||
} | ||
|
||
let mut file = match OpenOptions::new().create(true).append(true).open(&path) { | ||
Ok(file) => file, | ||
Err(e) => { | ||
eprintln!("Failed to open file: {:?}", e); | ||
return; | ||
} | ||
}; | ||
|
||
for message in receiver { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe I'm missing something here. Shouldn't we have a loop here somewhere. It seems like this thread just reads all the messages, writes them, then ends? It also looks like there is one of these threads per dependency/file. I was expecting an await, or perhaps this tasks continually gets created, but I only saw one initialisation. I'm likely missing something. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
As far as i understand it, the mpsc channel creates a sender and receiver. The receiver blocks until a message is available (as long as sender still exists). https://doc.rust-lang.org/std/sync/mpsc/fn.channel.html So that
There is one thread per dependency, the stream initialization per dependency happens here Might be a bit overkill to spawn a new stream per dependency, I can change it to one stream for all dependencies if that makes more sense There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah yeah all good. Sorry have been in tokio async land too long and forgot about std channels :p |
||
let should_clear_file = match NonBlockingFileWriter::get_file_size(&path) { | ||
Ok(file_size) => file_size > max_file_size, | ||
Err(_) => false, | ||
}; | ||
|
||
if should_clear_file { | ||
let _ = NonBlockingFileWriter::clear_file(&path); | ||
} | ||
|
||
if let Err(e) = writeln!(file, "{}", message) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it might be useful to tag on the time here. Typically when things go wrong, they break at some specific time. It would be very useful to have a timestamp so we can cross-reference with metrics/lighthouse logs. |
||
eprintln!("Failed to write to file: {:?}", e); | ||
} | ||
} | ||
}); | ||
|
||
Ok(NonBlockingFileWriter { sender }) | ||
} | ||
|
||
pub fn write(&self, message: String) -> Result<(), std::io::Error> { | ||
self.sender | ||
.send(message) | ||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)) | ||
} | ||
|
||
fn get_file_size(path: &PathBuf) -> std::io::Result<u64> { | ||
let metadata = fs::metadata(path)?; | ||
Ok(metadata.len()) | ||
} | ||
|
||
fn clear_file(path: &PathBuf) -> std::io::Result<()> { | ||
File::create(path)?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible for us to maybe just nuke half the file? Just worried about the case that we get a bug, but we have just recently removed all our logs and have no history. |
||
Ok(()) | ||
} | ||
} | ||
|
||
struct LogMessageExtractor { | ||
message: String, | ||
} | ||
|
||
impl tracing_core::field::Visit for LogMessageExtractor { | ||
fn record_debug(&mut self, _: &tracing_core::Field, value: &dyn std::fmt::Debug) { | ||
self.message = format!("{:?}", value); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
//! Exposes [`MetricsLayer`]: A tracing layer that registers metrics of logging events. | ||
|
||
use lighthouse_metrics as metrics; | ||
|
||
lazy_static! { | ||
/// Count of `INFO` logs registered per enabled dependency. | ||
pub static ref DEP_INFOS_TOTAL: metrics::Result<metrics::IntCounterVec> = | ||
metrics::try_create_int_counter_vec( | ||
"dep_info_total", | ||
"Count of infos logged per enabled dependency", | ||
&["target"] | ||
); | ||
/// Count of `WARN` logs registered per enabled dependency. | ||
pub static ref DEP_WARNS_TOTAL: metrics::Result<metrics::IntCounterVec> = | ||
metrics::try_create_int_counter_vec( | ||
"dep_warn_total", | ||
"Count of warns logged per enabled dependency", | ||
&["target"] | ||
); | ||
/// Count of `ERROR` logs registered per enabled dependency. | ||
pub static ref DEP_ERRORS_TOTAL: metrics::Result<metrics::IntCounterVec> = | ||
metrics::try_create_int_counter_vec( | ||
"dep_error_total", | ||
"Count of errors logged per enabled dependency", | ||
&["target"] | ||
); | ||
} | ||
|
||
/// Layer that registers Prometheus metrics for `INFO`, `WARN` and `ERROR` logs emitted per dependency. | ||
/// Dependencies are enabled via the `RUST_LOG` env flag. | ||
pub struct MetricsLayer; | ||
|
||
impl<S: tracing_core::Subscriber> tracing_subscriber::layer::Layer<S> for MetricsLayer { | ||
fn on_event( | ||
&self, | ||
event: &tracing_core::Event<'_>, | ||
_ctx: tracing_subscriber::layer::Context<'_, S>, | ||
) { | ||
let meta = event.metadata(); | ||
if !meta.is_event() { | ||
// ignore tracing span events | ||
return; | ||
} | ||
let target = match meta.target().split_once("::") { | ||
Some((crate_name, _)) => crate_name, | ||
None => "unknown", | ||
}; | ||
let target = &[target]; | ||
match *meta.level() { | ||
tracing_core::Level::INFO => metrics::inc_counter_vec(&DEP_INFOS_TOTAL, target), | ||
tracing_core::Level::WARN => metrics::inc_counter_vec(&DEP_WARNS_TOTAL, target), | ||
tracing_core::Level::ERROR => metrics::inc_counter_vec(&DEP_ERRORS_TOTAL, target), | ||
_ => {} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we use debug by default?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this one's for me, will change to debug
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done in c222b36