Skip to content

Commit

Permalink
improve more MT friendly.
Browse files Browse the repository at this point in the history
Set filter mode as MT_NICE_FILTER on Avisynth+ MT.
Use buffer pool on Avisynth+ MT.
Disable AVX2/FMA3/AVX code when /arch:AVX2 is not set.
Disable AVX2/FMA3/AVX code on Avisynth2.6.
  • Loading branch information
chikuzen committed May 25, 2016
1 parent 4d170a1 commit 335aa37
Show file tree
Hide file tree
Showing 11 changed files with 487 additions and 459 deletions.
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@ avisynth/src/Debug/*
avisynth/src/x64/*
avisynth/src/.vs/*
avisynth/archive/*
vapoursynth/*
*.opensdf
*.opendb
*.psess
*.sdf
*.suo
*.sln
*.vcxproj.filters
*.vcxproj.user
*.filters
*.user
*.vspx
*.vsp
*.dll
*.avs
*.exe
*.db
File renamed without changes.
14 changes: 8 additions & 6 deletions avisynth/src/TCannyMod.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>true</LinkIncremental>
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
Expand Down Expand Up @@ -103,12 +103,14 @@
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<OmitFramePointers>true</OmitFramePointers>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<FloatingPointModel>Precise</FloatingPointModel>
<FloatingPointModel>Fast</FloatingPointModel>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<StringPooling>true</StringPooling>
<Optimization>MaxSpeed</Optimization>
</ClCompile>
<Link>
<TargetMachine>MachineX86</TargetMachine>
<GenerateDebugInformation>Debug</GenerateDebugInformation>
<GenerateDebugInformation>No</GenerateDebugInformation>
<SubSystem>Windows</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
Expand All @@ -120,14 +122,14 @@
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
<OmitFramePointers>true</OmitFramePointers>
<FloatingPointModel>Fast</FloatingPointModel>
<Optimization>Full</Optimization>
<Optimization>MaxSpeed</Optimization>
<StringPooling>true</StringPooling>
</ClCompile>
<Link>
<GenerateDebugInformation>Debug</GenerateDebugInformation>
<GenerateDebugInformation>No</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
</ItemDefinitionGroup>
Expand Down
24 changes: 12 additions & 12 deletions avisynth/src/cpu_check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,12 @@ static inline void get_cpuid2(int *array, int info_type, int ecx)
#endif
}

static inline int is_bit_set(int bitfield, int bit)
static inline int is_bit_set(int bitfield, int bit) noexcept
{
return bitfield & (1 << bit);
}

static uint32_t get_simd_support_info(void)
static uint32_t get_simd_support_info(void) noexcept
{
uint32_t ret = 0;
int regs[4] = {0};
Expand Down Expand Up @@ -157,27 +157,27 @@ static uint32_t get_simd_support_info(void)
return ret;
}

int has_sse2()
bool has_sse2() noexcept
{
return !!(get_simd_support_info() & CPU_SSE2_SUPPORT);
return (get_simd_support_info() & CPU_SSE2_SUPPORT) != 0;
}

int has_ssse3()
bool has_ssse3() noexcept
{
return !!(get_simd_support_info() & CPU_SSSE3_SUPPORT);
return (get_simd_support_info() & CPU_SSSE3_SUPPORT) != 0;
}

int has_sse41()
bool has_sse41() noexcept
{
return !!(get_simd_support_info() & CPU_SSE4_1_SUPPORT);
return (get_simd_support_info() & CPU_SSE4_1_SUPPORT) != 0;
}

int has_avx()
bool has_avx() noexcept
{
return !!(get_simd_support_info() & CPU_AVX_SUPPORT);
return (get_simd_support_info() & CPU_AVX_SUPPORT) != 0;
}

int has_avx2()
bool has_avx2() noexcept
{
return !!(get_simd_support_info() & CPU_AVX2_SUPPORT);
return (get_simd_support_info() & CPU_AVX2_SUPPORT) != 0;
}
19 changes: 10 additions & 9 deletions avisynth/src/edge_detection.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#include "simd.h"


static const float* get_tangent(int idx)
static const float* get_tangent(int idx) noexcept
{
alignas(32) static const float tangent[32] = {
0.414213538169860839843750f, 0.414213538169860839843750f, // tan(pi/8)
Expand Down Expand Up @@ -60,7 +60,7 @@ template <typename Vf, typename Vi, bool CALC_DIR>
static void __stdcall
standard(float* blurp, const size_t blur_pitch, float* emaskp,
const size_t emask_pitch, int32_t* dirp, const size_t dir_pitch,
const size_t width, const size_t height)
const size_t width, const size_t height) noexcept
{

constexpr size_t step = sizeof(Vf) / sizeof(float);
Expand Down Expand Up @@ -110,13 +110,13 @@ standard(float* blurp, const size_t blur_pitch, float* emaskp,
Vi d3 = castps_si(and_ps(cmpge_ps(tan, t1125), cmplt_ps(tan, t1575)));
d3 = srli_i32(d3, 24);
d0 = or_si(or_si(d0, d1), or_si(d2, d3));
stream<Vi>(dirp + x, d0);
stream(dirp + x, d0);
}

Vf magnitude = mul(gx, gx);
magnitude = madd(gy, gy, magnitude);
magnitude = sqrt(magnitude);
stream<Vf>(emaskp + x, magnitude);
stream(emaskp + x, magnitude);
}
emaskp += emask_pitch;
dirp += dir_pitch;
Expand All @@ -138,7 +138,7 @@ template <typename Vf, typename Vi, bool CALC_DIR>
static void __stdcall
sobel(float* blurp, const size_t blur_pitch, float* emaskp,
const size_t emask_pitch, int32_t* dirp, const size_t dir_pitch,
const size_t width, const size_t height)
const size_t width, const size_t height) noexcept
{
constexpr size_t step = sizeof(Vf) / sizeof(float);

Expand Down Expand Up @@ -197,13 +197,13 @@ sobel(float* blurp, const size_t blur_pitch, float* emaskp,
Vi d3 = castps_si(and_ps(cmpge_ps(tan, t1125), cmplt_ps(tan, t1575)));
d3 = srli_i32(d3, 24);
d0 = or_si(or_si(d0, d1), or_si(d2, d3));
stream<Vi>(dirp + x, d0);
stream(dirp + x, d0);
}

Vf magnitude = mul(gx, gx);
magnitude = madd(gy, gy, magnitude);
magnitude = sqrt(magnitude);
stream<Vf>(emaskp + x, magnitude);
stream(emaskp + x, magnitude);
}
emaskp += emask_pitch;
dirp += dir_pitch;
Expand All @@ -218,7 +218,8 @@ template <typename Vf, typename Vi>
static void __stdcall
non_max_suppress(const float* emaskp, const size_t em_pitch,
const int32_t* dirp, const size_t dir_pitch, float* blurp,
const size_t blur_pitch, const size_t width, const size_t height)
const size_t blur_pitch, const size_t width,
const size_t height) noexcept
{
constexpr size_t step = sizeof(Vf) / sizeof(float);

Expand Down Expand Up @@ -275,7 +276,7 @@ non_max_suppress(const float* emaskp, const size_t em_pitch,
void __stdcall
hysteresis(uint8_t* hystp, const size_t hpitch, float* blurp,
const size_t bpitch, const int width, const int height,
const float tmin, const float tmax);
const float tmin, const float tmax) noexcept;

#endif

12 changes: 6 additions & 6 deletions avisynth/src/gaussian_blur.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@
template <typename Vf, arch_t ARCH>
static void __stdcall
convert_to_float(const size_t width, const size_t height, const uint8_t* srcp,
const int src_pitch, float* blurp, const size_t blur_pitch)
const int src_pitch, float* blurp, const size_t blur_pitch) noexcept
{
constexpr size_t step = sizeof(Vf) / sizeof(float);

for (size_t y = 0; y < height; y++) {
for (size_t x = 0; x < width; x += step) {
Vf val = cvtu8_ps<Vf, ARCH>(srcp + x);
stream<Vf>(blurp + x, val);
stream(blurp + x, val);
}
srcp += src_pitch;
blurp += blur_pitch;
Expand All @@ -50,7 +50,7 @@ convert_to_float(const size_t width, const size_t height, const uint8_t* srcp,
template <typename Vf>
static void
horizontal_blur(const float* hkernel, float* buffp, const int radius,
const size_t width, float* blurp)
const size_t width, float* blurp) noexcept
{
constexpr size_t step = sizeof(Vf) / sizeof(float);
const int length = radius * 2 + 1;
Expand All @@ -67,7 +67,7 @@ horizontal_blur(const float* hkernel, float* buffp, const int radius,
Vf val = loadu<Vf>(buffp + x + i);
sum = madd(k, val, sum);
}
stream<Vf>(blurp + x, sum);
stream(blurp + x, sum);
}
}

Expand All @@ -77,7 +77,7 @@ static void __stdcall
gaussian_blur(const int radius, const float* kernel, const float* hkernel,
float* buffp, float* blurp, const size_t blur_pitch,
const uint8_t* srcp, const size_t src_pitch, const size_t width,
const size_t height)
const size_t height) noexcept
{
if (radius == 0) {
convert_to_float<Vf, ARCH>(
Expand Down Expand Up @@ -106,7 +106,7 @@ gaussian_blur(const int radius, const float* kernel, const float* hkernel,

sum = madd(k, input, sum);
}
store<Vf>(buffp + x, sum);
store(buffp + x, sum);
}
horizontal_blur<Vf>(hkernel, buffp, radius, width, blurp);
blurp += blur_pitch;
Expand Down
5 changes: 3 additions & 2 deletions avisynth/src/hysteresis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ struct Pos {
static __forceinline void
hystfunc(const int32_t x, const int32_t y, float* edge, uint8_t* hyst,
const size_t epitch, const size_t hpitch, const float th,
std::vector<Pos>& stack)
std::vector<Pos>& stack) noexcept
{
if (!hyst[x + y * hpitch] && edge[x + y * epitch] > th) {
edge[x + y * epitch] = FLT_MAX;
Expand All @@ -54,10 +54,11 @@ hystfunc(const int32_t x, const int32_t y, float* edge, uint8_t* hyst,
void __stdcall
hysteresis(uint8_t* hystp, const size_t hpitch, float* blurp,
const size_t bpitch, const int width, const int height,
const float tmin, const float tmax)
const float tmin, const float tmax) noexcept
{
memset(hystp, 0, hpitch * height);
std::vector<Pos> stack;
stack.reserve(512);

for (int32_t y = 0; y < height; ++y) {
for (int32_t x = 0; x < width; ++x) {
Expand Down
Loading

0 comments on commit 335aa37

Please sign in to comment.