Skip to content

Commit

Permalink
Auto merge of rust-lang#116751 - Nadrieril:lint-overlap-per-column, r…
Browse files Browse the repository at this point in the history
…=<try>

Lint overlapping ranges as a separate pass

This reworks the [`overlapping_range_endpoints`](https://doc.rust-lang.org/beta/nightly-rustc/rustc_lint_defs/builtin/static.OVERLAPPING_RANGE_ENDPOINTS.html) lint. My motivations are:

- It was annoying to have this lint entangled with the exhaustiveness algorithm, especially wrt librarification;
- This makes the lint behave consistently.

Here's the consistency story. Take the following matches:
```rust
match (0u8, true) {
    (0..=10, true) => {}
    (10..20, true) => {}
    (10..20, false) => {}
    _ => {}
}
match (true, 0u8) {
    (true, 0..=10) => {}
    (true, 10..20) => {}
    (false, 10..20) => {}
    _ => {}
}
```
There are two semantically consistent options: option 1 we lint all overlaps between the ranges, option 2 we only lint the overlaps that could actually occur (i.e. the ones with `true`). Option 1 is what this PR does. Option 2 is possible but would require the exhaustiveness algorithm to track more things for the sake of the lint. The status quo is that we're inconsistent between the two.

Option 1 generates more false postives, but I prefer it from a maintainer's perspective. I do think the difference is minimal; cases where the difference is observable seem rare.

This PR adds a separate pass, so this will have a perf impact. Let's see how bad, it looked ok locally.
  • Loading branch information
bors committed Oct 15, 2023
2 parents 0d410be + 8872fef commit 600bb87
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 105 deletions.
91 changes: 17 additions & 74 deletions compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,22 +53,20 @@ use smallvec::{smallvec, SmallVec};
use rustc_apfloat::ieee::{DoubleS, IeeeFloat, SingleS};
use rustc_data_structures::captures::Captures;
use rustc_data_structures::fx::FxHashSet;
use rustc_hir::{HirId, RangeEnd};
use rustc_hir::RangeEnd;
use rustc_index::Idx;
use rustc_middle::middle::stability::EvalResult;
use rustc_middle::mir;
use rustc_middle::thir::{FieldPat, Pat, PatKind, PatRange};
use rustc_middle::ty::layout::IntegerExt;
use rustc_middle::ty::{self, Ty, TyCtxt, VariantDef};
use rustc_session::lint;
use rustc_span::{Span, DUMMY_SP};
use rustc_target::abi::{FieldIdx, Integer, VariantIdx, FIRST_VARIANT};

use self::Constructor::*;
use self::SliceKind::*;

use super::usefulness::{MatchCheckCtxt, PatCtxt};
use crate::errors::{Overlap, OverlappingRangeEndpoints};

/// Recursively expand this pattern into its subpatterns. Only useful for or-patterns.
fn expand_or_pat<'p, 'tcx>(pat: &'p Pat<'tcx>) -> Vec<&'p Pat<'tcx>> {
Expand Down Expand Up @@ -111,15 +109,15 @@ pub(crate) struct IntRange {

impl IntRange {
#[inline]
fn is_integral(ty: Ty<'_>) -> bool {
pub(super) fn is_integral(ty: Ty<'_>) -> bool {
matches!(ty.kind(), ty::Char | ty::Int(_) | ty::Uint(_) | ty::Bool)
}

fn is_singleton(&self) -> bool {
pub(super) fn is_singleton(&self) -> bool {
self.range.start() == self.range.end()
}

fn boundaries(&self) -> (u128, u128) {
pub(super) fn boundaries(&self) -> (u128, u128) {
(*self.range.start(), *self.range.end())
}

Expand Down Expand Up @@ -177,23 +175,6 @@ impl IntRange {
}
}

fn suspicious_intersection(&self, other: &Self) -> bool {
// `false` in the following cases:
// 1 ---- // 1 ---------- // 1 ---- // 1 ----
// 2 ---------- // 2 ---- // 2 ---- // 2 ----
//
// The following are currently `false`, but could be `true` in the future (#64007):
// 1 --------- // 1 ---------
// 2 ---------- // 2 ----------
//
// `true` in the following cases:
// 1 ------- // 1 -------
// 2 -------- // 2 -------
let (lo, hi) = self.boundaries();
let (other_lo, other_hi) = other.boundaries();
(lo == other_hi || hi == other_lo) && !self.is_singleton() && !other.is_singleton()
}

/// Partition a range of integers into disjoint subranges. This does constructor splitting for
/// integer ranges as explained at the top of the file.
///
Expand Down Expand Up @@ -293,7 +274,7 @@ impl IntRange {
}

/// Only used for displaying the range.
fn to_pat<'tcx>(&self, tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Pat<'tcx> {
pub(super) fn to_pat<'tcx>(&self, tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Pat<'tcx> {
let (lo, hi) = self.boundaries();

let bias = IntRange::signed_bias(tcx, ty);
Expand All @@ -315,51 +296,6 @@ impl IntRange {

Pat { ty, span: DUMMY_SP, kind }
}

/// Lint on likely incorrect range patterns (#63987)
pub(super) fn lint_overlapping_range_endpoints<'a, 'p: 'a, 'tcx: 'a>(
&self,
pcx: &PatCtxt<'_, 'p, 'tcx>,
pats: impl Iterator<Item = &'a DeconstructedPat<'p, 'tcx>>,
column_count: usize,
lint_root: HirId,
) {
if self.is_singleton() {
return;
}

if column_count != 1 {
// FIXME: for now, only check for overlapping ranges on simple range
// patterns. Otherwise with the current logic the following is detected
// as overlapping:
// ```
// match (0u8, true) {
// (0 ..= 125, false) => {}
// (125 ..= 255, true) => {}
// _ => {}
// }
// ```
return;
}

let overlap: Vec<_> = pats
.filter_map(|pat| Some((pat.ctor().as_int_range()?, pat.span())))
.filter(|(range, _)| self.suspicious_intersection(range))
.map(|(range, span)| Overlap {
range: self.intersection(&range).unwrap().to_pat(pcx.cx.tcx, pcx.ty),
span,
})
.collect();

if !overlap.is_empty() {
pcx.cx.tcx.emit_spanned_lint(
lint::builtin::OVERLAPPING_RANGE_ENDPOINTS,
lint_root,
pcx.span,
OverlappingRangeEndpoints { overlap, range: pcx.span },
);
}
}
}

/// Note: this is often not what we want: e.g. `false` is converted into the range `0..=0` and
Expand Down Expand Up @@ -651,7 +587,7 @@ impl<'tcx> Constructor<'tcx> {
_ => None,
}
}
fn as_int_range(&self) -> Option<&IntRange> {
pub(super) fn as_int_range(&self) -> Option<&IntRange> {
match self {
IntRange(range) => Some(range),
_ => None,
Expand Down Expand Up @@ -905,9 +841,9 @@ pub(super) enum ConstructorSet {
/// either fully included in or disjoint from each constructor in the column. This avoids
/// non-trivial intersections like between `0..10` and `5..15`.
#[derive(Debug)]
struct SplitConstructorSet<'tcx> {
present: SmallVec<[Constructor<'tcx>; 1]>,
missing: Vec<Constructor<'tcx>>,
pub(super) struct SplitConstructorSet<'tcx> {
pub(super) present: SmallVec<[Constructor<'tcx>; 1]>,
pub(super) missing: Vec<Constructor<'tcx>>,
/// For the `non_exhaustive_omitted_patterns` lint.
nonexhaustive_enum_missing_visible_variants: bool,
}
Expand Down Expand Up @@ -1039,7 +975,7 @@ impl ConstructorSet {
/// constructors to 1/ determine which constructors of the type (if any) are missing; 2/ split
/// constructors to handle non-trivial intersections e.g. on ranges or slices.
#[instrument(level = "debug", skip(self, pcx, ctors), ret)]
fn split<'a, 'tcx>(
pub(super) fn split<'a, 'tcx>(
&self,
pcx: &PatCtxt<'_, '_, 'tcx>,
ctors: impl Iterator<Item = &'a Constructor<'tcx>> + Clone,
Expand Down Expand Up @@ -1625,6 +1561,13 @@ impl<'p, 'tcx> DeconstructedPat<'p, 'tcx> {
pub(super) fn is_or_pat(&self) -> bool {
matches!(self.ctor, Or)
}
pub(super) fn flatten_or_pat(&'p self) -> SmallVec<[&'p Self; 1]> {
if self.is_or_pat() {
self.iter_fields().flat_map(|p| p.flatten_or_pat()).collect()
} else {
smallvec![self]
}
}

pub(super) fn ctor(&self) -> &Constructor<'tcx> {
&self.ctor
Expand Down
119 changes: 103 additions & 16 deletions compiler/rustc_mir_build/src/thir/pattern/usefulness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,8 @@
use self::ArmType::*;
use self::Usefulness::*;
use super::deconstruct_pat::{Constructor, ConstructorSet, DeconstructedPat, Fields};
use crate::errors::{NonExhaustiveOmittedPattern, Uncovered};
use super::deconstruct_pat::{Constructor, ConstructorSet, DeconstructedPat, Fields, IntRange};
use crate::errors::{NonExhaustiveOmittedPattern, Overlap, OverlappingRangeEndpoints, Uncovered};

use rustc_data_structures::captures::Captures;

Expand All @@ -317,6 +317,7 @@ use rustc_data_structures::stack::ensure_sufficient_stack;
use rustc_hir::def_id::DefId;
use rustc_hir::HirId;
use rustc_middle::ty::{self, Ty, TyCtxt};
use rustc_session::lint;
use rustc_session::lint::builtin::NON_EXHAUSTIVE_OMITTED_PATTERNS;
use rustc_span::{Span, DUMMY_SP};

Expand Down Expand Up @@ -474,11 +475,6 @@ impl<'p, 'tcx> Matrix<'p, 'tcx> {
Matrix { patterns: vec![] }
}

/// Number of columns of this matrix. `None` is the matrix is empty.
pub(super) fn column_count(&self) -> Option<usize> {
self.patterns.get(0).map(|r| r.len())
}

/// Pushes a new row to the matrix. If the row starts with an or-pattern, this recursively
/// expands it.
fn push(&mut self, row: PatStack<'p, 'tcx>) {
Expand Down Expand Up @@ -826,15 +822,6 @@ fn is_useful<'p, 'tcx>(

let v_ctor = v.head().ctor();
debug!(?v_ctor);
if let Constructor::IntRange(ctor_range) = &v_ctor {
// Lint on likely incorrect range patterns (#63987)
ctor_range.lint_overlapping_range_endpoints(
pcx,
matrix.heads(),
matrix.column_count().unwrap_or(0),
lint_root,
)
}
// We split the head constructor of `v`.
let split_ctors = v_ctor.split(pcx, matrix.heads().map(DeconstructedPat::ctor));
let is_non_exhaustive_and_wild =
Expand Down Expand Up @@ -914,6 +901,102 @@ fn is_useful<'p, 'tcx>(
ret
}

/// Traverse the patterns to warn the user about ranges that overlap on their endpoints.
/// This traverses patterns column-by-column, where a column is the intuitive notion of "subpatterns
/// that inspect the same subvalue". Despite similarities with `is_useful`, this traversal is
/// different. Notably this is linear in the depth of patterns, whereas `is_useful` is worst-case
/// exponential (exhaustiveness is NP-complete).
fn lint_overlapping_range_endpoints<'p, 'tcx>(
cx: &MatchCheckCtxt<'p, 'tcx>,
column: &[&DeconstructedPat<'p, 'tcx>],
lint_root: HirId,
) {
if column.is_empty() {
return;
}
let ty = column[0].ty();
let pcx = &PatCtxt { cx, ty, span: DUMMY_SP, is_top_level: false };

let column_ctors = column.iter().map(|p| p.ctor());
let set = ConstructorSet::for_ty(pcx.cx, pcx.ty).split(pcx, column_ctors);

if IntRange::is_integral(ty) {
// If two ranges overlapped, the split set will contain their intersection as a singleton.
let split_int_ranges = set.present.iter().filter_map(|c| c.as_int_range());
for overlap_range in split_int_ranges.clone() {
if overlap_range.is_singleton() {
let overlap: u128 = overlap_range.boundaries().0;
// Spans of ranges that start or end with the overlap.
let mut prefixes: SmallVec<[_; 1]> = Default::default();
let mut suffixes: SmallVec<[_; 1]> = Default::default();
// Iterate on patterns that contained `overlap`.
for pat in column {
let this_span = pat.span();
let Constructor::IntRange(this_range) = pat.ctor() else { continue };
if this_range.is_singleton() {
// Don't lint when one of the ranges is a singleton.
continue;
}
let mut this_overlaps: SmallVec<[_; 1]> = Default::default();
let (start, end) = this_range.boundaries();
if start == overlap {
if !prefixes.is_empty() {
this_overlaps = prefixes.clone();
}
suffixes.push(this_span)
} else if end == overlap {
if !suffixes.is_empty() {
this_overlaps = suffixes.clone();
}
prefixes.push(this_span)
}
if !this_overlaps.is_empty() {
let overlap_as_pat = overlap_range.to_pat(pcx.cx.tcx, pcx.ty);
let overlaps: Vec<_> = this_overlaps
.into_iter()
.map(|span| Overlap { range: overlap_as_pat.clone(), span })
.collect();
pcx.cx.tcx.emit_spanned_lint(
lint::builtin::OVERLAPPING_RANGE_ENDPOINTS,
lint_root,
this_span,
OverlappingRangeEndpoints { overlap: overlaps, range: this_span },
);
}
}
}
}
}

// Recurse into the fields.
for ctor in set.present {
let arity = ctor.arity(pcx);
if arity == 0 {
continue;
}

// We specialize the column by `ctor`. This gives us `arity`-many columns of patterns. These
// columns may have different lengths in the presence of or-patterns (this is why we can't
// reuse `Matrix`).
let mut specialized_columns: Vec<Vec<_>> = (0..arity).map(|_| Vec::new()).collect();
let relevant_patterns = column.iter().filter(|pat| ctor.is_covered_by(pcx, pat.ctor()));
for pat in relevant_patterns {
let specialized = pat.specialize(pcx, &ctor);
for (subpat, sub_column) in specialized.iter().zip(&mut specialized_columns) {
if subpat.is_or_pat() {
sub_column.extend(subpat.iter_fields())
} else {
sub_column.push(subpat)
}
}
}

for col in specialized_columns.iter() {
lint_overlapping_range_endpoints(cx, col.as_slice(), lint_root);
}
}
}

/// The arm of a match expression.
#[derive(Clone, Copy, Debug)]
pub(crate) struct MatchArm<'p, 'tcx> {
Expand Down Expand Up @@ -982,5 +1065,9 @@ pub(crate) fn compute_match_usefulness<'p, 'tcx>(
WithWitnesses(pats) => pats.into_iter().map(|w| w.single_pattern()).collect(),
NoWitnesses { .. } => bug!(),
};

let pat_column = arms.iter().flat_map(|arm| arm.pat.flatten_or_pat()).collect::<Vec<_>>();
lint_overlapping_range_endpoints(cx, &pat_column, lint_root);

UsefulnessReport { arm_usefulness, non_exhaustiveness_witnesses }
}
1 change: 1 addition & 0 deletions tests/ui/mir/mir_match_test.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![feature(exclusive_range_pattern)]
#![allow(overlapping_range_endpoints)]

// run-pass

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@ macro_rules! m {
$t2 => {}
_ => {}
}
}
};
}

fn main() {
m!(0u8, 20..=30, 30..=40); //~ ERROR multiple patterns overlap on their endpoints
m!(0u8, 30..=40, 20..=30); //~ ERROR multiple patterns overlap on their endpoints
m!(0u8, 20..=30, 31..=40);
m!(0u8, 20..=30, 29..=40);
m!(0u8, 20.. 30, 29..=40); //~ ERROR multiple patterns overlap on their endpoints
m!(0u8, 20.. 30, 28..=40);
m!(0u8, 20.. 30, 30..=40);
m!(0u8, 20..30, 29..=40); //~ ERROR multiple patterns overlap on their endpoints
m!(0u8, 20..30, 28..=40);
m!(0u8, 20..30, 30..=40);
m!(0u8, 20..=30, 30..=30);
m!(0u8, 20..=30, 30..=31); //~ ERROR multiple patterns overlap on their endpoints
m!(0u8, 20..=30, 29..=30);
Expand All @@ -28,27 +28,29 @@ fn main() {
m!(0u8, 20..=30, 20);
m!(0u8, 20..=30, 25);
m!(0u8, 20..=30, 30);
m!(0u8, 20.. 30, 29);
m!(0u8, 20..30, 29);
m!(0u8, 20, 20..=30);
m!(0u8, 25, 20..=30);
m!(0u8, 30, 20..=30);

match 0u8 {
0..=10 => {}
20..=30 => {}
10..=20 => {} //~ ERROR multiple patterns overlap on their endpoints
10..=20 => {}
//~^ ERROR multiple patterns overlap on their endpoints
//~| ERROR multiple patterns overlap on their endpoints
_ => {}
}
match (0u8, true) {
(0..=10, true) => {}
(10..20, true) => {} // not detected
(10..20, false) => {}
(10..20, true) => {} //~ ERROR multiple patterns overlap on their endpoints
(10..20, false) => {} //~ ERROR multiple patterns overlap on their endpoints
_ => {}
}
match (true, 0u8) {
(true, 0..=10) => {}
(true, 10..20) => {} //~ ERROR multiple patterns overlap on their endpoints
(false, 10..20) => {}
(false, 10..20) => {} //~ ERROR multiple patterns overlap on their endpoints
_ => {}
}
match Some(0u8) {
Expand Down
Loading

0 comments on commit 600bb87

Please sign in to comment.