Skip to content

Commit

Permalink
Correct build issue with gcc and neon version
Browse files Browse the repository at this point in the history
  • Loading branch information
christophe0606 committed Jan 22, 2025
1 parent 662f0b4 commit f6ba3eb
Show file tree
Hide file tree
Showing 33 changed files with 155 additions and 140 deletions.
89 changes: 49 additions & 40 deletions Include/dsp/matrix_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ extern "C"
\
for(_w=0;_w < nb; _w++) \
{ \
*data *= CAST v; \
*data = CAST *data * CAST v; \
data += _numCols; \
} \
}
Expand Down Expand Up @@ -178,54 +178,63 @@ extern "C"
} \
}

#define SCALE_ROW_F16(A,COL,v,i) \
{ \
#define SCALE_ROW_F16(A,COL,v,i) \
{ \
int32_t _w; \
float16_t *data = (A)->pData; \
float16_t *data = (A)->pData; \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols-(COL); \
\
\
data += i*_numCols + (COL); \
\
for(_w=0;_w < nb; _w++) \
{ \
*data++ *= (_Float16)v; \
} \
\
_Float16 sum; \
for(_w=0;_w < nb; _w++) \
{ \
sum = *data; \
sum *= (_Float16)v; \
*data++ = sum; \
} \
}


#define MAC_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
for(_w=0;_w < nb; _w++) \
{ \
*dataA++ += (_Float16)v * (_Float16)*dataB++;\
} \
#define MAC_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
_Float16 sum ; \
for(_w=0;_w < nb; _w++) \
{ \
sum = *dataA; \
sum += (_Float16)v * (_Float16)*dataB++;\
*dataA++ = sum; \
} \
}

#define MAS_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
for(_w=0;_w < nb; _w++) \
{ \
*dataA++ -= (_Float16)v * (_Float16)*dataB++;\
} \
#define MAS_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
_Float16 sum ; \
for(_w=0;_w < nb; _w++) \
{ \
sum = *dataA; \
sum -= (_Float16)v * (_Float16)*dataB++;\
*dataA++ = sum; \
} \
}

#endif /*defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)*/
Expand Down
12 changes: 6 additions & 6 deletions Ne10/CMSIS_NE10_fft.neonintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,17 +110,17 @@

#define VDUPQ_N_F32(VAR) { VAR, VAR, VAR, VAR }

#define CONST_TW_81 0.70710678
#define CONST_TW_81N -0.70710678
#define CONST_TW_81 0.70710678f
#define CONST_TW_81N -0.70710678f

const static float32x4_t Q_TW_81 = VDUPQ_N_F32(CONST_TW_81 );
const static float32x4_t Q_TW_81N = VDUPQ_N_F32(CONST_TW_81N);
static const float32x4_t Q_TW_81 = VDUPQ_N_F32(CONST_TW_81 );
static const float32x4_t Q_TW_81N = VDUPQ_N_F32(CONST_TW_81N);

#define DIV_TW81 1.4142136f
#define DIV_TW81N -1.4142136f

const static float32x4_t DIV_TW81_NEON = VDUPQ_N_F32(DIV_TW81);
const static float32x4_t DIV_TW81N_NEON = VDUPQ_N_F32(DIV_TW81N);
static const float32x4_t DIV_TW81_NEON = VDUPQ_N_F32(DIV_TW81);
static const float32x4_t DIV_TW81N_NEON = VDUPQ_N_F32(DIV_TW81N);

#define NE10_RADIX8x4_R2C_NEON_KERNEL_S1(Q_OUT,Q_IN) do { \
Q_OUT ## 0 = vaddq_f32 (Q_IN ## 0, Q_IN ## 4); \
Expand Down
8 changes: 4 additions & 4 deletions Ne10/CMSIS_NE10_fft.neonintrinsic_f16.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,14 @@
#define CONST_TW_81 0.70710678f16
#define CONST_TW_81N -0.70710678f16

const static float16x4_t Q_TW_81 = VDUPQ_N_F16(CONST_TW_81 );
const static float16x4_t Q_TW_81N = VDUPQ_N_F16(CONST_TW_81N);
static const float16x4_t Q_TW_81 = VDUPQ_N_F16(CONST_TW_81 );
static const float16x4_t Q_TW_81N = VDUPQ_N_F16(CONST_TW_81N);

#define DIV_TW81 1.4142136f16
#define DIV_TW81N -1.4142136f16

const static float16x4_t DIV_TW81_NEON = VDUPQ_N_F16(DIV_TW81);
const static float16x4_t DIV_TW81N_NEON = VDUPQ_N_F16(DIV_TW81N);
static const float16x4_t DIV_TW81_NEON = VDUPQ_N_F16(DIV_TW81);
static const float16x4_t DIV_TW81N_NEON = VDUPQ_N_F16(DIV_TW81N);

#define NE10_RADIX8x4_R2C_NEON_KERNEL_S1(Q_OUT,Q_IN) do { \
Q_OUT ## 0 = vadd_f16 (Q_IN ## 0, Q_IN ## 4); \
Expand Down
40 changes: 20 additions & 20 deletions Ne10/CMSIS_NE10_fft_common_variables.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,60 +40,60 @@
///////////////////////////

/* Twiddles used in Radix-8 FFT */
const static ne10_float32_t TW_81_F32 = 0.70710678; // sqrt (2) / 2
const static ne10_float32_t TW_81N_F32 = -0.70710678; // - TW_81_F32
static const ne10_float32_t TW_81_F32 = 0.70710678f; // sqrt (2) / 2
static const ne10_float32_t TW_81N_F32 = -0.70710678f; // - TW_81_F32

/* Twiddles used in Radix-5 FFT */
const static ne10_fft_cpx_float32_t TW_5A_F32 =
static const ne10_fft_cpx_float32_t TW_5A_F32 =
{
0.309016994374947, // cos (2 * pi / 5)
-0.951056516295154 // - sin (2 * pi / 5)
0.309016994374947f, // cos (2 * pi / 5)
-0.951056516295154f // - sin (2 * pi / 5)
};
const static ne10_fft_cpx_int32_t TW_5A_S32 =
static const ne10_fft_cpx_int32_t TW_5A_S32 =
{
663608942, // round (TW_5A_F32.r * 2^31)
-2042378317 // round (TW_5A_F32.i * 2^31)
};

const static ne10_fft_cpx_float32_t TW_5B_F32 =
static const ne10_fft_cpx_float32_t TW_5B_F32 =
{
-0.809016994374947, // cos (4 * pi / 5)
-0.587785252292473 // - sin (4 * pi / 5)
-0.809016994374947f, // cos (4 * pi / 5)
-0.587785252292473f // - sin (4 * pi / 5)
};
const static ne10_fft_cpx_int32_t TW_5B_S32 =
static const ne10_fft_cpx_int32_t TW_5B_S32 =
{
-1737350766, // round (TW_5B_F32.r * 2^31)
-1262259218 // round (TW_5B_F32.i * 2^31)
};

/* Twiddles used in Radix-3 FFT */
const static ne10_float32_t TW_3I_F32 = 0.866025403784439; // sqrt (3) / 2
const static ne10_float32_t TW_3IN_F32 = - 0.866025403784439; // - TW_3IN_F32
const static ne10_int32_t TW_3I_S32 = 1859775393; // round (TW_3I_F32 * 2^31)
const static ne10_int32_t TW_3IN_S32 = -1859775393; // round (TW_3IN_F32 * 2^31)
static const ne10_float32_t TW_3I_F32 = 0.866025403784439f; // sqrt (3) / 2
static const ne10_float32_t TW_3IN_F32 = - 0.866025403784439f; // - TW_3IN_F32
static const ne10_int32_t TW_3I_S32 = 1859775393; // round (TW_3I_F32 * 2^31)
static const ne10_int32_t TW_3IN_S32 = -1859775393; // round (TW_3IN_F32 * 2^31)

#if defined(ARM_MATH_NEON_FLOAT16) && defined(ARM_FLOAT16_SUPPORTED)

/* Twiddles used in Radix-8 FFT */
const static ne10_float16_t TW_81_F16 = 0.70710678f16; // sqrt (2) / 2
const static ne10_float16_t TW_81N_F16 = -0.70710678f16; // - TW_81_F32
static const ne10_float16_t TW_81_F16 = 0.70710678f16; // sqrt (2) / 2
static const ne10_float16_t TW_81N_F16 = -0.70710678f16; // - TW_81_F32

/* Twiddles used in Radix-5 FFT */
const static ne10_fft_cpx_float16_t TW_5A_F16 =
static const ne10_fft_cpx_float16_t TW_5A_F16 =
{
0.309016994374947f16, // cos (2 * pi / 5)
-0.951056516295154f16 // - sin (2 * pi / 5)
};

const static ne10_fft_cpx_float16_t TW_5B_F16 =
static const ne10_fft_cpx_float16_t TW_5B_F16 =
{
-0.809016994374947f16, // cos (4 * pi / 5)
-0.587785252292473f16 // - sin (4 * pi / 5)
};

/* Twiddles used in Radix-3 FFT */
const static ne10_float16_t TW_3I_F16 = 0.866025403784439f16; // sqrt (3) / 2
const static ne10_float16_t TW_3IN_F16 = - 0.866025403784439f16; // - TW_3IN_F32
static const ne10_float16_t TW_3I_F16 = 0.866025403784439f16; // sqrt (3) / 2
static const ne10_float16_t TW_3IN_F16 = - 0.866025403784439f16; // - TW_3IN_F32
#endif

#endif // NE10_FFT_COMMON_VARIBLES_H
20 changes: 10 additions & 10 deletions Ne10/CMSIS_NE10_fft_generic_float16.neonintrisic.c
Original file line number Diff line number Diff line change
Expand Up @@ -345,12 +345,12 @@ static inline void NE10_FFT8_FUC_NEON_F16 (CPLX out[8],
const CPLX in[8])
{
CPLX s[8];
const static ne10_fft_cpx_float16_t TW_8[4] =
static const ne10_fft_cpx_float16_t TW_8[4] =
{
{ 1.00000, 0.00000 },
{ 0.70711, -0.70711 },
{ 0.00000, -1.00000 },
{ -0.70711, -0.70711 },
{ 1.00000f16, 0.00000f16 },
{ 0.70711f16, -0.70711f16 },
{ 0.00000f16, -1.00000f16 },
{ -0.70711f16, -0.70711f16 },
};

// STAGE - 1
Expand Down Expand Up @@ -430,7 +430,7 @@ static void ne10_radix_2_butterfly_float16_neon_##ISFIRSTSTAGE##_##ISINVERSE##_#
ne10_int32_t f_count; \
ne10_int32_t m_count; \
\
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F16 (0.25 / nfft); \
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F16 (0.25f16 / nfft); \
\
for (f_count = fstride; f_count > 0; f_count--) \
{ \
Expand Down Expand Up @@ -509,7 +509,7 @@ static void ne10_radix_4_butterfly_float16_neon_##ISFIRSTSTAGE##_##ISINVERSE##_#
ne10_int32_t f_count; \
ne10_int32_t m_count; \
\
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F16 (0.25 / nfft); \
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F16 (0.25f16 / nfft); \
\
for (f_count = fstride; f_count > 0; f_count--) \
{ \
Expand Down Expand Up @@ -593,7 +593,7 @@ static void ne10_radix_3_butterfly_float16_neon_##ISFIRSTSTAGE##_##ISINVERSE##_#
ne10_int32_t f_count; \
ne10_int32_t m_count; \
\
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F16 (0.25 / nfft); \
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F16 (0.25f16 / nfft); \
const float16x4_t TW_3IN_NEON_F16 = vdup_n_f16 (TW_3IN_F16); \
const float16x4_t HALF_NEON_F16 = vdup_n_f16 (0.5f16); \
\
Expand Down Expand Up @@ -693,7 +693,7 @@ static void ne10_radix_5_butterfly_float16_neon_##ISFIRSTSTAGE##_##ISINVERSE##_#
ne10_int32_t f_count; \
ne10_int32_t m_count; \
\
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F16 (0.25 / nfft); \
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F16 (0.25f16 / nfft); \
\
for (f_count = fstride; f_count > 0; f_count--) \
{ \
Expand Down Expand Up @@ -815,7 +815,7 @@ static void ne10_radix_8_butterfly_float16_neon_##ISFIRSTSTAGE##_##ISINVERSE##_#
ne10_int32_t f_count; \
ne10_int32_t m_count; \
\
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F16 (0.25 / nfft); \
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F16 (0.25f16 / nfft); \
\
for (f_count = fstride; f_count > 0; f_count--) \
{ \
Expand Down
20 changes: 10 additions & 10 deletions Ne10/CMSIS_NE10_fft_generic_float32.neonintrisic.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,12 +429,12 @@ static inline void NE10_FFT8_FUC_NEON_F32 (CPLX out[8],
const CPLX in[8])
{
CPLX s[8];
const static ne10_fft_cpx_float32_t TW_8[4] =
static const ne10_fft_cpx_float32_t TW_8[4] =
{
{ 1.00000, 0.00000 },
{ 0.70711, -0.70711 },
{ 0.00000, -1.00000 },
{ -0.70711, -0.70711 },
{ 1.00000f, 0.00000f },
{ 0.70711f, -0.70711f },
{ 0.00000f, -1.00000f },
{ -0.70711f, -0.70711f },
};

// STAGE - 1
Expand Down Expand Up @@ -514,7 +514,7 @@ static void ne10_radix_2_butterfly_float32_neon_##ISFIRSTSTAGE##_##ISINVERSE##_#
ne10_int32_t f_count; \
ne10_int32_t m_count; \
\
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F32 (0.25 / nfft); \
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F32 (0.25f / nfft); \
\
for (f_count = fstride; f_count > 0; f_count--) \
{ \
Expand Down Expand Up @@ -593,7 +593,7 @@ static void ne10_radix_4_butterfly_float32_neon_##ISFIRSTSTAGE##_##ISINVERSE##_#
ne10_int32_t f_count; \
ne10_int32_t m_count; \
\
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F32 (0.25 / nfft); \
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F32 (0.25f / nfft); \
\
for (f_count = fstride; f_count > 0; f_count--) \
{ \
Expand Down Expand Up @@ -677,7 +677,7 @@ static void ne10_radix_3_butterfly_float32_neon_##ISFIRSTSTAGE##_##ISINVERSE##_#
ne10_int32_t f_count; \
ne10_int32_t m_count; \
\
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F32 (0.25 / nfft); \
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F32 (0.25f / nfft); \
const float32x4_t TW_3IN_NEON_F32 = vdupq_n_f32 (TW_3IN_F32); \
const float32x4_t HALF_NEON_F32 = vdupq_n_f32 (0.5f); \
\
Expand Down Expand Up @@ -777,7 +777,7 @@ static void ne10_radix_5_butterfly_float32_neon_##ISFIRSTSTAGE##_##ISINVERSE##_#
ne10_int32_t f_count; \
ne10_int32_t m_count; \
\
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F32 (0.25 / nfft); \
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F32 (0.25f / nfft); \
\
for (f_count = fstride; f_count > 0; f_count--) \
{ \
Expand Down Expand Up @@ -899,7 +899,7 @@ static void ne10_radix_8_butterfly_float32_neon_##ISFIRSTSTAGE##_##ISINVERSE##_#
ne10_int32_t f_count; \
ne10_int32_t m_count; \
\
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F32 (0.25 / nfft); \
const REAL one_by_fft_neon = NE10_REAL_DUP_NEON_F32 (0.25f / nfft); \
\
for (f_count = fstride; f_count > 0; f_count--) \
{ \
Expand Down
8 changes: 4 additions & 4 deletions Ne10/NE10_fft_float32.neonintrinsic.c
Original file line number Diff line number Diff line change
Expand Up @@ -422,8 +422,8 @@ __STATIC_INLINE void ne10_radix8x4_neon (ne10_fft_cpx_float32_t *out,
ne10_int32_t src_step = stride << 1; // ne10_fft_cpx_float32_t -> float32_t offset
const float32_t *p_src = (const float32_t *) in;
float32_t *p_dst = (float32_t *) out;
const ne10_float32_t TW_81 = 0.70710678;
const ne10_float32_t TW_81N = -0.70710678;
const ne10_float32_t TW_81 = 0.70710678f;
const ne10_float32_t TW_81N = -0.70710678f;

CMPLX_VEC_F32 q2_in0, q2_in1, q2_in2, q2_in3, q2_in4, q2_in5, q2_in6, q2_in7;
float32x4_t q_sin0_r, q_sin0_i, q_sin1_r, q_sin1_i, q_sin2_r, q_sin2_i, q_sin3_r, q_sin3_i;
Expand Down Expand Up @@ -759,8 +759,8 @@ __STATIC_INLINE void ne10_radix8x4_inverse_neon (ne10_fft_cpx_float32_t *out,
ne10_int32_t src_step = stride << 1;
const float32_t *p_src = (const float32_t *) in;
float32_t *p_dst = (float32_t *) out;
const ne10_float32_t TW_81 = 0.70710678;
const ne10_float32_t TW_81N = -0.70710678;
const ne10_float32_t TW_81 = 0.70710678f;
const ne10_float32_t TW_81N = -0.70710678f;

CMPLX_VEC_F32 q2_in0, q2_in1, q2_in2, q2_in3, q2_in4, q2_in5, q2_in6, q2_in7;
float32x4_t q_sin0_r, q_sin0_i, q_sin1_r, q_sin1_i, q_sin2_r, q_sin2_i, q_sin3_r, q_sin3_i;
Expand Down
Loading

0 comments on commit f6ba3eb

Please sign in to comment.