Skip to content

Commit

Permalink
Optimize frame stats collection
Browse files Browse the repository at this point in the history
Optimize UI performance by calculating frame collection statistics
dynamically. Rather than repeatedly traversing each frame, we now update
the stats upon adding or removing frames. This change mitigates the
significant overhead caused by constantly accessing frame data protected
by RwLock.
  • Loading branch information
andreiltd committed Jun 14, 2023
1 parent c4f6146 commit 9247a73
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 32 deletions.
132 changes: 119 additions & 13 deletions puffin/src/profile_view.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::collections::HashSet;
use std::sync::{Arc, Mutex};

use crate::{FrameData, FrameSinkId};
use crate::{FrameData, FrameIndex, FrameSinkId};

/// A view of recent and slowest frames, used by GUIs.
#[derive(Clone)]
Expand All @@ -16,19 +17,24 @@ pub struct FrameView {
///
/// Only recommended if you set a large max_recent size.
pack_frames: bool,

/// Maintain stats as we add/remove frames
stats: Stats,
}

impl Default for FrameView {
fn default() -> Self {
let max_recent = 60 * 60 * 5;
let max_slow = 256;
let stats = Default::default();

Self {
recent: std::collections::VecDeque::with_capacity(max_recent),
max_recent,
slowest: std::collections::BinaryHeap::with_capacity(max_slow),
max_slow,
pack_frames: true,
stats,
}
}
}
Expand All @@ -46,6 +52,7 @@ impl FrameView {
// The safe choice is to clear everything:
self.recent.clear();
self.slowest.clear();
self.stats.clear();
}
}

Expand All @@ -57,24 +64,54 @@ impl FrameView {
false
};

if add_to_slowest {
self.slowest.push(OrderedByDuration(new_frame.clone()));
while self.slowest.len() > self.max_slow {
self.slowest.pop();
}
}

if let Some(last) = self.recent.back() {
// Assume there is a viewer viewing the newest frame,
// and compress the previously newest frame to save RAM:
if self.pack_frames {
last.pack();
}

self.stats.add(last);
}

if add_to_slowest {
self.add_slow_frame(&new_frame);
}

self.add_recent_frame(&new_frame);
}

pub fn add_slow_frame(&mut self, new_frame: &Arc<FrameData>) {
self.slowest.push(OrderedByDuration(new_frame.clone()));

while self.slowest.len() > self.max_slow {
if let Some(removed_frame) = self.slowest.pop() {
// Only remove from stats if the frame is not present in recent
if self
.recent
.binary_search_by_key(&removed_frame.0.frame_index(), |f| f.frame_index())
.is_err()
{
self.stats.remove(&removed_frame.0);
}
}
}
}

pub fn add_recent_frame(&mut self, new_frame: &Arc<FrameData>) {
self.recent.push_back(new_frame.clone());

self.recent.push_back(new_frame);
while self.recent.len() > self.max_recent {
self.recent.pop_front();
if let Some(removed_frame) = self.recent.pop_front() {
// Only remove from stats if the frame is not present in slowest
if !self
.slowest
.iter()
.any(|f| removed_frame.frame_index() == f.0.frame_index())
{
self.stats.remove(&removed_frame);
}
}
}
}

Expand Down Expand Up @@ -107,16 +144,23 @@ impl FrameView {

/// All frames sorted chronologically.
pub fn all_uniq(&self) -> Vec<Arc<FrameData>> {
let mut all: Vec<_> = self.slowest.iter().map(|f| f.0.clone()).collect();
all.extend(self.recent.iter().cloned());
let mut all: Vec<_> = self
.slowest
.iter()
.map(|f| f.0.clone())
.chain(self.recent.iter().cloned())
.collect();

all.sort_by_key(|frame| frame.frame_index());
all.dedup_by_key(|frame| frame.frame_index());
all
}

/// Clean history of the slowest frames.
pub fn clear_slowest(&mut self) {
self.slowest.clear();
for frame in self.slowest.drain() {
self.stats.remove(&frame.0);
}
}

/// How many frames of recent history to store.
Expand Down Expand Up @@ -147,6 +191,10 @@ impl FrameView {
self.pack_frames = pack_frames;
}

pub fn stats(&self) -> &Stats {
&self.stats
}

/// Export profile data as a `.puffin` file.
#[cfg(feature = "serialization")]
#[cfg(not(target_arch = "wasm32"))] // compression not supported on wasm
Expand Down Expand Up @@ -271,3 +319,61 @@ impl GlobalFrameView {
self.view.lock().unwrap()
}
}

// ----------------------------------------------------------------------------

/// Collect statistics for maintained frames
#[derive(Clone, Debug, Default)]
pub struct Stats {
unique_frames: HashSet<FrameIndex>,
total_ram_used: usize,
unpacked_frames: usize,
}

impl Stats {
fn add(&mut self, frame: &Arc<FrameData>) {
if self.unique_frames.insert(frame.frame_index()) {
self.total_ram_used = self
.total_ram_used
.saturating_add(frame.bytes_of_ram_used());

self.unpacked_frames = self
.unpacked_frames
.saturating_add(frame.has_unpacked() as usize);
}
}

fn remove(&mut self, frame: &Arc<FrameData>) {
if self.unique_frames.remove(&frame.frame_index()) {
self.total_ram_used = self
.total_ram_used
.saturating_sub(frame.bytes_of_ram_used());

self.unpacked_frames = self
.unpacked_frames
.saturating_sub(frame.has_unpacked() as usize);
}
}

pub fn frames(&self) -> usize {
assert!(self.unique_frames.len() >= self.unpacked_frames);
self.unique_frames.len()
}

pub fn unpacked_frames(&self) -> usize {
assert!(self.unique_frames.len() >= self.unpacked_frames);
self.unpacked_frames
}

pub fn bytes_of_ram_used(&self) -> usize {
assert!(self.unique_frames.len() >= self.unpacked_frames);
self.total_ram_used
}

pub fn clear(&mut self) {
self.unique_frames.clear();

self.unpacked_frames = 0;
self.total_ram_used = 0;
}
}
35 changes: 16 additions & 19 deletions puffin_egui/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,13 @@ impl AvailableFrames {
}

fn all_uniq(&self) -> Vec<Arc<FrameData>> {
let mut all = self.slowest.clone();
all.extend(self.recent.iter().cloned());
let mut all = self
.slowest
.iter()
.cloned()
.chain(self.recent.iter().cloned())
.collect::<Vec<_>>();

all.sort_by_key(|frame| frame.frame_index());
all.dedup_by_key(|frame| frame.frame_index());
all
Expand Down Expand Up @@ -631,21 +636,17 @@ impl ProfilerUi {

{
let uniq = frames.all_uniq();
let mut bytes = 0;
let mut unpacked = 0;
for frame in &uniq {
bytes += frame.bytes_of_ram_used();
unpacked += frame.has_unpacked() as usize;
}
let stats = frame_view.stats();

ui.label(format!(
"{} frames ({} unpacked) using approximately {:.1} MB.",
uniq.len(),
unpacked,
bytes as f64 * 1e-6
stats.frames(),
stats.unpacked_frames(),
stats.bytes_of_ram_used() as f64 * 1e-6
));

if let Some(frame_view) = frame_view.as_mut() {
max_frames_ui(ui, frame_view);
max_frames_ui(ui, frame_view, &uniq);
if self.paused.is_none() {
max_num_latest_ui(ui, &mut self.max_num_latest);
}
Expand Down Expand Up @@ -835,13 +836,9 @@ fn format_time(nanos: NanoSecond) -> Option<String> {
}
}

fn max_frames_ui(ui: &mut egui::Ui, frame_view: &mut FrameView) {
let uniq = frame_view.all_uniq();

let mut bytes = 0;
for frame in &uniq {
bytes += frame.bytes_of_ram_used();
}
fn max_frames_ui(ui: &mut egui::Ui, frame_view: &mut FrameView, uniq: &Vec<Arc<FrameData>>) {
let stats = frame_view.stats();
let bytes = stats.bytes_of_ram_used();

let frames_per_second = if let (Some(first), Some(last)) = (uniq.first(), uniq.last()) {
let nanos = last.range_ns().1 - first.range_ns().0;
Expand Down

0 comments on commit 9247a73

Please sign in to comment.