Skip to content

Commit

Permalink
FEAT: In dgemm sse2 and fallback, use a 4x4 kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
bluss committed Dec 5, 2018
1 parent fd6294a commit 28e7007
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions src/dgemm_kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ impl GemmKernel for KernelAvx {

impl GemmKernel for KernelSse2 {
type Elem = T;
const MR: usize = MR;
const NR: usize = NR;
const MR: usize = 4;
const NR: usize = 4;

#[inline(always)]
fn align_to() -> usize { 16 }
Expand Down Expand Up @@ -130,8 +130,8 @@ impl GemmKernel for KernelSse2 {

impl GemmKernel for KernelFallback {
type Elem = T;
const MR: usize = MR;
const NR: usize = NR;
const MR: usize = 4;
const NR: usize = 4;

#[inline(always)]
fn align_to() -> usize { 0 }
Expand Down Expand Up @@ -752,7 +752,7 @@ unsafe fn kernel_fallback_impl(k: usize, alpha: T, a: *const T, b: *const T,

// Compute matrix multiplication into ab[i][j]
unroll_by!(4 => k, {
loop_m!(i, loop_n!(j, ab[i][j] += at(a, i) * at(b, j)));
loop4!(i, loop4!(j, ab[i][j] += at(a, i) * at(b, j)));

a = a.offset(MR as isize);
b = b.offset(NR as isize);
Expand All @@ -763,7 +763,7 @@ unsafe fn kernel_fallback_impl(k: usize, alpha: T, a: *const T, b: *const T,
}

// set C = α A B + β C
loop_n!(j, loop_m!(i, *c![i, j] = alpha * ab[i][j]));
loop4!(j, loop4!(i, *c![i, j] = alpha * ab[i][j]));
}

#[inline(always)]
Expand Down

0 comments on commit 28e7007

Please sign in to comment.