Skip to content

Commit

Permalink
Do not zero txa before initializing it
Browse files Browse the repository at this point in the history
  • Loading branch information
rinon committed Jun 29, 2024
1 parent badd73e commit 032977b
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 21 deletions.
41 changes: 41 additions & 0 deletions src/ctx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
use crate::src::disjoint_mut::AsMutPtr;
use crate::src::disjoint_mut::DisjointMut;
use std::iter::zip;
use std::ptr;

/// Perform a `memset` optimized for lengths that are small powers of 2.
///
Expand Down Expand Up @@ -75,6 +76,37 @@ pub fn small_memset<T: Clone + Copy, const UP_TO: usize, const WITH_DEFAULT: boo
}
}

/// # Safety
///
/// `buf` must be correctly aligned and dereferencable (but need not be
/// initialized). `T` must not have a destructor.
fn small_memset_raw<T: Clone + Copy, const UP_TO: usize, const WITH_DEFAULT: bool>(
buf: *mut [T],
val: T,
offset: usize,
len: usize,
) {
//assert!(buf.len() >= offset && buf.len() - offset >= len);
// SAFETY: `buf` is correctly aligned for type T and offset is within bounds.
let buf = unsafe { (buf as *mut T).add(offset) };
match len {
01 if UP_TO >= 01 => unsafe { ptr::write(buf as *mut [T; 01], [val; 01]) },
02 if UP_TO >= 02 => unsafe { ptr::write(buf as *mut [T; 02], [val; 02]) },
04 if UP_TO >= 04 => unsafe { ptr::write(buf as *mut [T; 04], [val; 04]) },
08 if UP_TO >= 08 => unsafe { ptr::write(buf as *mut [T; 08], [val; 08]) },
16 if UP_TO >= 16 => unsafe { ptr::write(buf as *mut [T; 16], [val; 16]) },
32 if UP_TO >= 32 => unsafe { ptr::write(buf as *mut [T; 32], [val; 32]) },
64 if UP_TO >= 64 => unsafe { ptr::write(buf as *mut [T; 64], [val; 64]) },
_ => {
if WITH_DEFAULT {
for i in 0..len {
unsafe { buf.add(i).write(val) };
}
}
}
}
}

pub struct CaseSetter<const UP_TO: usize, const WITH_DEFAULT: bool> {
offset: usize,
len: usize,
Expand All @@ -99,6 +131,15 @@ impl<const UP_TO: usize, const WITH_DEFAULT: bool> CaseSetter<UP_TO, WITH_DEFAUL
let mut buf = buf.index_mut(self.offset..self.offset + self.len);
small_memset::<V, UP_TO, WITH_DEFAULT>(&mut *buf, val);
}

/// # Safety
///
/// `buf` must be correctly aligned and dereferencable (but need not be
/// initialized).
#[inline]
pub unsafe fn set_raw<T: Clone + Copy>(&self, buf: *mut [T], val: T) {
small_memset_raw::<T, UP_TO, WITH_DEFAULT>(buf, val, self.offset, self.len);
}
}

/// The entrypoint to the [`CaseSet`] API.
Expand Down
83 changes: 62 additions & 21 deletions src/lf_mask.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ use libc::ptrdiff_t;
use parking_lot::RwLock;
use std::cmp;
use std::ffi::c_int;
use std::mem::MaybeUninit;
use std::ptr;

#[repr(C)]
pub struct Av1FilterLUT {
Expand Down Expand Up @@ -92,8 +94,14 @@ pub struct Av1Restoration {
/// but in Rust, dereferencing such a pointer would be an out-of-bounds access, and thus UB.
/// Instead of offsetting `txa`, the offsets are calculated from
/// the existing `y_off` and `x_off` args and applied at each use site of `txa.
///
/// Initializes:
/// * `txa[0][0][y][x]` for all `y` and `x` in the range of the current block
/// * `txa[1][0][y][x]` for all `y` and `x` in the range of the current block
/// * `txa[0][1][y][x_off * t_dim.w]` for all `y` in the range of the current block
/// * `txa[1][1][y_off * t_dim.h][x]` for all `x` in the range of the current block
fn decomp_tx(
txa: &mut [[[[u8; 32]; 32]; 2]; 2],
txa: &mut [[[[MaybeUninit<u8>; 32]; 32]; 2]; 2],
from: TxfmSize,
depth: usize,
y_off: u8,
Expand Down Expand Up @@ -128,15 +136,18 @@ fn decomp_tx(
let lw = cmp::min(2, t_dim.lw);
let lh = cmp::min(2, t_dim.lh);

debug_assert!(t_dim.w == 1 << t_dim.lw && t_dim.w <= 16);
CaseSet::<16, false>::one((), t_dim.w as usize, x0, |case, ()| {
for y in 0..t_dim.h as usize {
case.set(&mut txa[0][0][y0 + y], lw);
case.set(&mut txa[1][0][y0 + y], lh);
txa[0][1][y0 + y][x0] = t_dim.w;
unsafe {
case.set_raw(ptr::from_mut(&mut txa[0][0][y0 + y]) as *mut [u8; 32], lw);
case.set_raw(ptr::from_mut(&mut txa[1][0][y0 + y]) as *mut [u8; 32], lh);
txa[0][1][y0 + y][x0].write(t_dim.w);
}
}
});
CaseSet::<16, false>::one((), t_dim.w as usize, x0, |case, ()| {
case.set(&mut txa[1][1][y0], t_dim.h);
CaseSet::<16, false>::one((), t_dim.w as usize, x0, |case, ()| unsafe {
case.set_raw(ptr::from_mut(&mut txa[1][1][y0]) as *mut [u8; 32], t_dim.h);
});
};
}
Expand All @@ -157,45 +168,63 @@ fn mask_edges_inter(
let t_dim = &dav1d_txfm_dimensions[max_tx as usize];

// See [`decomp_tx`]'s docs for the `txa` arg.
let mut txa = Align16([[[[0; 32]; 32]; 2]; 2]);

// SAFETY: We are building an array of uninitialized MaybeUninit values,
// which do not require initialization, so this `assume_init` is safe
let mut txa: Align16<[[[[MaybeUninit<u8>; 32]; 32]; 2]; 2]> =
unsafe { MaybeUninit::uninit().assume_init() };

for (y_off, _) in (0..h4).step_by(t_dim.h as usize).enumerate() {
for (x_off, _) in (0..w4).step_by(t_dim.w as usize).enumerate() {
decomp_tx(&mut txa.0, max_tx, 0, y_off as u8, x_off as u8, tx_masks);
}
}

// After these calls to `decomp_tx`, the following elements of `txa` are initialized:
// * `txa[0][0][0..h4][0..w4]`
// * `txa[1][0][0..h4][0..w4]`
// * `txa[0][1][0..h4][x]` where `x` is the start of a block edge
// * `txa[1][1][y][0..w4]` where `y` is the start of a block edge

// left block edge
for y in 0..h4 {
let mask = 1u32 << (by4 + y);
let sidx = (mask >= 0x10000) as usize;
let smask = mask >> (sidx << 4);
masks[0][bx4][cmp::min(txa[0][0][y][0], l[y]) as usize][sidx]
.update(|it| it | smask as u16);
// SAFETY: y < h4 so txa[0][0][y][0] is initialized.
let txa_y = unsafe { txa[0][0][y][0].assume_init() };
masks[0][bx4][cmp::min(txa_y, l[y]) as usize][sidx].update(|it| it | smask as u16);
}

// top block edge
for x in 0..w4 {
let mask = 1u32 << (bx4 + x);
let sidx = (mask >= 0x10000) as usize;
let smask = mask >> (sidx << 4);
masks[1][by4][cmp::min(txa[1][0][0][x], a[x]) as usize][sidx]
.update(|it| it | smask as u16);
// SAFETY: x < h4 so txa[1][0][0][x] is initialized.
let txa_x = unsafe { txa[1][0][0][x].assume_init() };
masks[1][by4][cmp::min(txa_x, a[x]) as usize][sidx].update(|it| it | smask as u16);
}
if !skip {
// inner (tx) left|right edges
for y in 0..h4 {
let mask = 1u32 << (by4 + y);
let sidx = (mask >= 0x10000) as usize;
let smask = mask >> (sidx << 4);
let mut ltx = txa[0][0][y][0];
let step = txa[0][1][y][0] as usize;
// SAFETY: y < h4 so txa[0][0][y][0] is initialized.
let mut ltx = unsafe { txa[0][0][y][0].assume_init() };
// SAFETY: y < h4 and x == 0 so txa[0][1][y][0] is initialized.
let step = unsafe { txa[0][1][y][0].assume_init() } as usize;
let mut x = step;
while x < w4 {
let rtx = txa[0][0][y][x];
// SAFETY: x < w4 and y < h4 so txa[0][0][y][x] is initialized.
let rtx = unsafe { txa[0][0][y][x].assume_init() };
masks[0][bx4 + x][cmp::min(rtx, ltx) as usize][sidx].update(|it| it | smask as u16);
ltx = rtx;
let step = txa[0][1][y][x] as usize;
// SAFETY: x is incremented by tdim.w from previously
// initialized element, so we know that this element is a block
// edge and also initialized.
let step = unsafe { txa[0][1][y][x].assume_init() } as usize;
x += step;
}
}
Expand All @@ -207,23 +236,35 @@ fn mask_edges_inter(
let mask = 1u32 << (bx4 + x);
let sidx = (mask >= 0x10000) as usize;
let smask = mask >> (sidx << 4);
let mut ttx = txa[1][0][0][x];
let step = txa[1][1][0][x] as usize;
// SAFETY: x < w4 so txa[1][0][0][x] is initialized.
let mut ttx = unsafe { txa[1][0][0][x].assume_init() };
// SAFETY: x < h4 and y == 0 so txa[1][1][0][x] is initialized.
let step = unsafe { txa[1][1][0][x].assume_init() } as usize;
let mut y = step;
while y < h4 {
let btx = txa[1][0][y][x];
// SAFETY: x < w4 and y < h4 so txa[1][0][y][x] is initialized.
let btx = unsafe { txa[1][0][y][x].assume_init() };
masks[1][by4 + y][cmp::min(ttx, btx) as usize][sidx].update(|it| it | smask as u16);
ttx = btx;
let step = txa[1][1][y][x] as usize;
// SAFETY: y is incremented by tdim.h from previously
// initialized element, so we know that this element is a block
// edge and also initialized.
let step = unsafe { txa[1][1][y][x].assume_init() } as usize;
y += step;
}
}
}

for (l, txa) in l[..h4].iter_mut().zip(&txa[0][0][..h4]) {
*l = txa[w4 - 1];
// SAFETY: y < h4 and x < w4 so txa[0][0][y][x] is initialized.
*l = unsafe { txa[w4 - 1].assume_init() };
}
a[..w4].copy_from_slice(&txa[1][0][h4 - 1][..w4]);
// SAFETY: y < h4 and x < w4 so txa[1][0][y][x] is initialized. Note that
// this can be replaced by `MaybeUninit::slice_assume_init_ref` if it is
// stabilized.
let txa_slice =
unsafe { &*(&txa[1][0][h4 - 1][..w4] as *const [MaybeUninit<u8>] as *const [u8]) };
a[..w4].copy_from_slice(txa_slice);
}

#[inline]
Expand Down

0 comments on commit 032977b

Please sign in to comment.