Skip to content

Commit

Permalink
Detect black holes based on loss bursts for robustness
Browse files Browse the repository at this point in the history
Sponsored by Stormshield
  • Loading branch information
aochagavia committed Apr 5, 2023
1 parent 75051bd commit 339a40b
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 46 deletions.
2 changes: 1 addition & 1 deletion quinn-proto/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ impl MtuDiscoveryConfig {

impl Default for MtuDiscoveryConfig {
fn default() -> Self {
MtuDiscoveryConfig {
Self {
interval: Duration::from_secs(600),
upper_bound: 1452,
black_hole_cooldown: Duration::from_secs(60),
Expand Down
15 changes: 14 additions & 1 deletion quinn-proto/src/connection/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1469,14 +1469,27 @@ impl Connection {
lost_packets,
size_of_lost_packets
);

let mut prev_lost_packet_number = None;
for packet in &lost_packets {
let info = self.spaces[pn_space].sent_packets.remove(packet).unwrap(); // safe: lost_packets is populated just above
self.remove_in_flight(pn_space, &info);
for frame in info.stream_frames {
self.streams.retransmit(frame);
}
self.spaces[pn_space].pending |= info.retransmits;
self.path.mtud.on_non_probe_lost(now, *packet, info.size);

// A loss burst is a group of consecutive packets that are declared lost
let new_loss_burst =
prev_lost_packet_number.map_or(true, |prev| packet - prev != 1);

if new_loss_burst {
let black_hole_detected =
self.path.mtud.on_packet_loss_burst(now, *packet, info.size);
self.stats.path.black_holes_detected += u64::from(black_hole_detected);
}

prev_lost_packet_number = Some(*packet);
}
// Don't apply congestion penalty for lost ack-only packets
let lost_ack_eliciting = old_bytes_in_flight != self.in_flight.bytes;
Expand Down
148 changes: 104 additions & 44 deletions quinn-proto/src/connection/mtud.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,8 @@ pub(crate) struct MtuDiscovery {
current_mtu: u16,
/// The state of the MTU discovery, if enabled
state: Option<EnabledMtuDiscovery>,
/// A count of the number of packets with a size > BASE_UDP_PAYLOAD_SIZE lost since
/// the last time a packet with size equal to the current MTU was acknowledged
black_hole_counter: u8,
/// The largest acked packet of size `current_mtu`
largest_acked_mtu_sized_packet: Option<u64>,
/// The state of the black hole detector
black_hole_detector: BlackHoleDetector,
}

impl MtuDiscovery {
Expand All @@ -30,7 +27,7 @@ impl MtuDiscovery {
);

let mut mtud =
MtuDiscovery::with_state(initial_plpmtu, Some(EnabledMtuDiscovery::new(config)));
Self::with_state(initial_plpmtu, Some(EnabledMtuDiscovery::new(config)));

// We might be migrating an existing connection to a new path, in which case the transport
// parameters have already been transmitted, and we already know the value of
Expand All @@ -44,15 +41,14 @@ impl MtuDiscovery {

/// MTU discovery will be disabled and the current MTU will be fixed to the provided value
pub(crate) fn disabled(plpmtu: u16) -> Self {
MtuDiscovery::with_state(plpmtu, None)
Self::with_state(plpmtu, None)
}

fn with_state(current_mtu: u16, state: Option<EnabledMtuDiscovery>) -> Self {
Self {
current_mtu,
state,
black_hole_counter: 0,
largest_acked_mtu_sized_packet: None,
black_hole_detector: BlackHoleDetector::new(),
}
}

Expand Down Expand Up @@ -97,19 +93,13 @@ impl MtuDiscovery {
self.current_mtu = new_mtu;
trace!(max_udp_payload_size = self.current_mtu, "new MTU detected");

// We know for sure the path supports the current MTU
self.black_hole_counter = 0;
}

// Reset the black hole counter if a packet the size of the current MTU or larger
// has been acknowledged
if packet_bytes >= self.current_mtu
&& self
.largest_acked_mtu_sized_packet
.map_or(true, |pn| packet_number > pn)
{
self.black_hole_counter = 0;
self.largest_acked_mtu_sized_packet = Some(packet_number);
self.black_hole_detector.on_probe_acked();
} else {
self.black_hole_detector.on_non_probe_acked(
self.current_mtu,
packet_number,
packet_bytes,
);
}
}

Expand All @@ -131,32 +121,28 @@ impl MtuDiscovery {
}
}

/// Notifies the [`MtuDiscovery`] that a non-probe packet was lost
pub(crate) fn on_non_probe_lost(&mut self, now: Instant, packet_number: u64, packet_size: u16) {
// Ignore packets smaller or equal than the base size
if packet_size <= BASE_UDP_PAYLOAD_SIZE {
return;
}

// Ignore lost packets if we have received an ACK for a more recent MTU-sized packet
/// Notifies the [`MtuDiscovery`] of a new packet loss burst
///
/// Returns true if a black hole was detected
pub(crate) fn on_packet_loss_burst(
&mut self,
now: Instant,
leading_packet_number: u64,
leading_packet_size: u16,
) -> bool {
if self
.largest_acked_mtu_sized_packet
.map_or(false, |pn| packet_number < pn)
.black_hole_detector
.on_packet_loss_burst(leading_packet_number, leading_packet_size)
{
return;
}

// The packet counts towards black hole detection
self.black_hole_counter += 1;
if self.black_hole_counter > BLACK_HOLE_THRESHOLD {
self.black_hole_counter = 0;
self.largest_acked_mtu_sized_packet = None;
self.current_mtu = BASE_UDP_PAYLOAD_SIZE;

if let Some(state) = &mut self.state {
state.on_black_hole_detected(now);
return true;
}
}

false
}
}

Expand Down Expand Up @@ -350,6 +336,80 @@ impl SearchState {
}
}

#[derive(Clone)]
struct BlackHoleDetector {
/// Counts suspicious packet loss bursts since the last time a packet with size equal to the
/// current MTU was acknowledged
///
/// Suspicious packet loss bursts are those that start with a packet of size >
/// BASE_UDP_PAYLOAD_SIZE
loss_burst_counter: u8,
/// The largest acked packet of size `current_mtu`
largest_acked_mtu_sized_packet: Option<u64>,
}

impl BlackHoleDetector {
fn new() -> Self {
Self {
loss_burst_counter: 0,
largest_acked_mtu_sized_packet: None,
}
}

pub(crate) fn on_probe_acked(&mut self) {
// We know for sure the path supports the current MTU
self.loss_burst_counter = 0;
}

pub(crate) fn on_non_probe_acked(
&mut self,
current_mtu: u16,
packet_number: u64,
packet_bytes: u16,
) {
// Reset the black hole counter if a packet the size of the current MTU or larger
// has been acknowledged
if packet_bytes >= current_mtu
&& self
.largest_acked_mtu_sized_packet
.map_or(true, |pn| packet_number > pn)
{
self.loss_burst_counter = 0;
self.largest_acked_mtu_sized_packet = Some(packet_number);
}
}

/// Notifies the [`BlackHoleDetector`] of a packet loss burst
pub(crate) fn on_packet_loss_burst(
&mut self,
leading_packet_number: u64,
leading_packet_size: u16,
) -> bool {
// Ignore leading packets smaller or equal than the base size
if leading_packet_size <= BASE_UDP_PAYLOAD_SIZE {
return false;
}

// Ignore leading packets if we have received an ACK for a more recent MTU-sized packet
if self
.largest_acked_mtu_sized_packet
.map_or(false, |pn| leading_packet_number < pn)
{
return false;
}

// The burst counts towards black hole detection
self.loss_burst_counter += 1;
if self.loss_burst_counter <= BLACK_HOLE_THRESHOLD {
return false;
}

self.loss_burst_counter = 0;
self.largest_acked_mtu_sized_packet = None;
true
}
}

// Corresponds to the RFC's `MAX_PROBES` constant (see
// https://www.rfc-editor.org/rfc/rfc8899#section-5.1.2)
const MAX_PROBE_RETRANSMITS: usize = 3;
Expand Down Expand Up @@ -410,25 +470,25 @@ mod tests {
}

#[test]
fn mtu_discovery_disabled_lost_four_packets_triggers_black_hole_detection() {
fn mtu_discovery_disabled_lost_four_packet_bursts_triggers_black_hole_detection() {
let mut mtud = MtuDiscovery::disabled(1_400);
let now = Instant::now();

for _ in 0..4 {
mtud.on_non_probe_lost(now, 0, 1300);
mtud.on_packet_loss_burst(now, 0, 1300);
}

assert_eq!(mtud.current_mtu, 1200);
assert_matches!(mtud.state, None);
}

#[test]
fn mtu_discovery_lost_four_packets_triggers_black_hole_detection_and_resets_timer() {
fn mtu_discovery_lost_four_packet_bursts_triggers_black_hole_detection_and_resets_timer() {
let mut mtud = default_mtud();
let now = Instant::now();

for _ in 0..4 {
mtud.on_non_probe_lost(now, 0, 1300);
mtud.on_packet_loss_burst(now, 0, 1300);
}

assert_eq!(mtud.current_mtu, 1200);
Expand Down
2 changes: 2 additions & 0 deletions quinn-proto/src/connection/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ pub struct PathStats {
/// The amount of PLPMTUD probe packets lost on this path (ignored by `lost_packets` and
/// `lost_bytes`)
pub lost_plpmtud_probes: u64,
/// The number of times a black hole was detected in the path
pub black_holes_detected: u64,
}

/// Connection statistics
Expand Down
1 change: 1 addition & 0 deletions quinn-proto/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2136,6 +2136,7 @@ fn blackhole_after_mtu_change_repairs_itself() {
let client_stats = pair.client_conn_mut(client_ch).stats();
assert!(client_stats.path.lost_packets >= 3);
assert!(client_stats.path.congestion_events >= 3);
assert_eq!(client_stats.path.black_holes_detected, 1);
}

#[test]
Expand Down

0 comments on commit 339a40b

Please sign in to comment.