improve more MT friendly.

Set filter mode as MT_NICE_FILTER on Avisynth+ MT. Use buffer pool on Avisynth+ MT. Disable AVX2/FMA3/AVX code when /arch:AVX2 is not set. Disable AVX2/FMA3/AVX code on Avisynth2.6.
chikuzen · May 25, 2016 · 335aa37 · 335aa37
1 parent 4d170a1
commit 335aa37
Show file tree

Hide file tree

Showing 11 changed files with 487 additions and 459 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,16 +3,18 @@ avisynth/src/Debug/*
 avisynth/src/x64/*
 avisynth/src/.vs/*
 avisynth/archive/*
+vapoursynth/*
 *.opensdf
 *.opendb
 *.psess
 *.sdf
 *.suo
 *.sln
-*.vcxproj.filters
-*.vcxproj.user
+*.filters
+*.user
 *.vspx
 *.vsp
 *.dll
 *.avs
 *.exe
+*.db
diff --git a/avisynth/LISENCE.GPLv2 → avisynth/LICENSE.GPLv2 b/avisynth/LISENCE.GPLv2 → avisynth/LICENSE.GPLv2
diff --git a/avisynth/src/TCannyMod.vcxproj b/avisynth/src/TCannyMod.vcxproj
@@ -71,7 +71,7 @@
     <LinkIncremental>true</LinkIncremental>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <LinkIncremental>true</LinkIncremental>
+    <LinkIncremental>false</LinkIncremental>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
@@ -103,12 +103,14 @@
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <OmitFramePointers>true</OmitFramePointers>
       <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
-      <FloatingPointModel>Precise</FloatingPointModel>
+      <FloatingPointModel>Fast</FloatingPointModel>
       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <StringPooling>true</StringPooling>
+      <Optimization>MaxSpeed</Optimization>
     </ClCompile>
     <Link>
       <TargetMachine>MachineX86</TargetMachine>
-      <GenerateDebugInformation>Debug</GenerateDebugInformation>
+      <GenerateDebugInformation>No</GenerateDebugInformation>
       <SubSystem>Windows</SubSystem>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
@@ -120,14 +122,14 @@
       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
-      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
       <OmitFramePointers>true</OmitFramePointers>
       <FloatingPointModel>Fast</FloatingPointModel>
-      <Optimization>Full</Optimization>
+      <Optimization>MaxSpeed</Optimization>
       <StringPooling>true</StringPooling>
     </ClCompile>
     <Link>
-      <GenerateDebugInformation>Debug</GenerateDebugInformation>
+      <GenerateDebugInformation>No</GenerateDebugInformation>
       <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
     </Link>
   </ItemDefinitionGroup>

diff --git a/avisynth/src/cpu_check.cpp b/avisynth/src/cpu_check.cpp
@@ -71,12 +71,12 @@ static inline void get_cpuid2(int *array, int info_type, int ecx)
 #endif
 }
 
-static inline int is_bit_set(int bitfield, int bit)
+static inline int is_bit_set(int bitfield, int bit)  noexcept
 {
     return bitfield & (1 << bit);
 }
 
-static uint32_t get_simd_support_info(void)
+static uint32_t get_simd_support_info(void) noexcept
 {
     uint32_t ret = 0;
     int regs[4] = {0};
@@ -157,27 +157,27 @@ static uint32_t get_simd_support_info(void)
     return ret;
 }
 
-int has_sse2()
+bool has_sse2() noexcept
 {
-    return !!(get_simd_support_info() & CPU_SSE2_SUPPORT);
+    return (get_simd_support_info() & CPU_SSE2_SUPPORT) != 0;
 }
 
-int has_ssse3()
+bool has_ssse3() noexcept
 {
-    return !!(get_simd_support_info() & CPU_SSSE3_SUPPORT);
+    return (get_simd_support_info() & CPU_SSSE3_SUPPORT) != 0;
 }
 
-int has_sse41()
+bool has_sse41() noexcept
 {
-    return !!(get_simd_support_info() & CPU_SSE4_1_SUPPORT);
+    return (get_simd_support_info() & CPU_SSE4_1_SUPPORT) != 0;
 }
 
-int has_avx()
+bool has_avx() noexcept
 {
-    return !!(get_simd_support_info() & CPU_AVX_SUPPORT);
+    return (get_simd_support_info() & CPU_AVX_SUPPORT) != 0;
 }
 
-int has_avx2()
+bool has_avx2() noexcept
 {
-    return !!(get_simd_support_info() & CPU_AVX2_SUPPORT);
+    return (get_simd_support_info() & CPU_AVX2_SUPPORT) != 0;
 }
diff --git a/avisynth/src/edge_detection.h b/avisynth/src/edge_detection.h
@@ -31,7 +31,7 @@
 #include "simd.h"
 
 
-static const float* get_tangent(int idx)
+static const float* get_tangent(int idx) noexcept
 {
      alignas(32) static const float tangent[32] = {
         0.414213538169860839843750f, 0.414213538169860839843750f, // tan(pi/8)
@@ -60,7 +60,7 @@ template <typename Vf, typename Vi, bool CALC_DIR>
 static void __stdcall
 standard(float* blurp, const size_t blur_pitch, float* emaskp,
          const size_t emask_pitch, int32_t* dirp, const size_t dir_pitch,
-         const size_t width, const size_t height)
+         const size_t width, const size_t height) noexcept
 {
 
     constexpr size_t step = sizeof(Vf) / sizeof(float);
@@ -110,13 +110,13 @@ standard(float* blurp, const size_t blur_pitch, float* emaskp,
                 Vi d3 = castps_si(and_ps(cmpge_ps(tan, t1125), cmplt_ps(tan, t1575)));
                 d3 = srli_i32(d3, 24);
                 d0 = or_si(or_si(d0, d1), or_si(d2, d3));
-                stream<Vi>(dirp + x, d0);
+                stream(dirp + x, d0);
             }
 
             Vf magnitude = mul(gx, gx);
             magnitude = madd(gy, gy, magnitude);
             magnitude = sqrt(magnitude);
-            stream<Vf>(emaskp + x, magnitude);
+            stream(emaskp + x, magnitude);
         }
         emaskp += emask_pitch;
         dirp += dir_pitch;
@@ -138,7 +138,7 @@ template <typename Vf, typename Vi, bool CALC_DIR>
 static void __stdcall
 sobel(float* blurp, const size_t blur_pitch, float* emaskp,
       const size_t emask_pitch, int32_t* dirp, const size_t dir_pitch,
-      const size_t width, const size_t height)
+      const size_t width, const size_t height) noexcept
 {
     constexpr size_t step = sizeof(Vf) / sizeof(float);
 
@@ -197,13 +197,13 @@ sobel(float* blurp, const size_t blur_pitch, float* emaskp,
                 Vi d3 = castps_si(and_ps(cmpge_ps(tan, t1125), cmplt_ps(tan, t1575)));
                 d3 = srli_i32(d3, 24);
                 d0 = or_si(or_si(d0, d1), or_si(d2, d3));
-                stream<Vi>(dirp + x, d0);
+                stream(dirp + x, d0);
             }
 
             Vf magnitude = mul(gx, gx);
             magnitude = madd(gy, gy, magnitude);
             magnitude = sqrt(magnitude);
-            stream<Vf>(emaskp + x, magnitude);
+            stream(emaskp + x, magnitude);
         }
         emaskp += emask_pitch;
         dirp += dir_pitch;
@@ -218,7 +218,8 @@ template <typename Vf, typename Vi>
 static void __stdcall
 non_max_suppress(const float* emaskp, const size_t em_pitch,
                  const int32_t* dirp, const size_t dir_pitch, float* blurp,
-                 const size_t blur_pitch, const size_t width, const size_t height)
+                 const size_t blur_pitch, const size_t width,
+                 const size_t height) noexcept
 {
     constexpr size_t step = sizeof(Vf) / sizeof(float);
 
@@ -275,7 +276,7 @@ non_max_suppress(const float* emaskp, const size_t em_pitch,
 void __stdcall
 hysteresis(uint8_t* hystp, const size_t hpitch, float* blurp,
            const size_t bpitch, const int width, const int height,
-           const float tmin, const float tmax);
+           const float tmin, const float tmax) noexcept;
 
 #endif
 
diff --git a/avisynth/src/gaussian_blur.h b/avisynth/src/gaussian_blur.h
@@ -32,14 +32,14 @@
 template <typename Vf, arch_t ARCH>
 static void __stdcall
 convert_to_float(const size_t width, const size_t height, const uint8_t* srcp,
-                 const int src_pitch, float* blurp, const size_t blur_pitch)
+                 const int src_pitch, float* blurp, const size_t blur_pitch) noexcept
 {
     constexpr size_t step = sizeof(Vf) / sizeof(float);
 
     for (size_t y = 0; y < height; y++) {
         for (size_t x = 0; x < width; x += step) {
             Vf val = cvtu8_ps<Vf, ARCH>(srcp + x);
-            stream<Vf>(blurp + x, val);
+            stream(blurp + x, val);
         }
         srcp += src_pitch;
         blurp += blur_pitch;
@@ -50,7 +50,7 @@ convert_to_float(const size_t width, const size_t height, const uint8_t* srcp,
 template <typename Vf>
 static void
 horizontal_blur(const float* hkernel, float* buffp, const int radius,
-                const size_t width, float* blurp)
+                const size_t width, float* blurp) noexcept
 {
     constexpr size_t step = sizeof(Vf) / sizeof(float);
     const int length = radius * 2 + 1;
@@ -67,7 +67,7 @@ horizontal_blur(const float* hkernel, float* buffp, const int radius,
             Vf val = loadu<Vf>(buffp + x + i);
             sum = madd(k, val, sum);
         }
-        stream<Vf>(blurp + x, sum);
+        stream(blurp + x, sum);
     }
 }
 
@@ -77,7 +77,7 @@ static void __stdcall
 gaussian_blur(const int radius, const float* kernel, const float* hkernel,
               float* buffp, float* blurp, const size_t blur_pitch,
               const uint8_t* srcp, const size_t src_pitch, const size_t width,
-              const size_t height)
+              const size_t height) noexcept
 {
     if (radius == 0) {
         convert_to_float<Vf, ARCH>(
@@ -106,7 +106,7 @@ gaussian_blur(const int radius, const float* kernel, const float* hkernel,
 
                 sum = madd(k, input, sum);
             }
-            store<Vf>(buffp + x, sum);
+            store(buffp + x, sum);
         }
         horizontal_blur<Vf>(hkernel, buffp, radius, width, blurp);
         blurp += blur_pitch;

diff --git a/avisynth/src/hysteresis.cpp b/avisynth/src/hysteresis.cpp
@@ -41,7 +41,7 @@ struct Pos {
 static __forceinline void
 hystfunc(const int32_t x, const int32_t y, float* edge, uint8_t* hyst,
          const size_t epitch, const size_t hpitch, const float th,
-         std::vector<Pos>& stack)
+         std::vector<Pos>& stack) noexcept
 {
     if (!hyst[x + y * hpitch] && edge[x + y * epitch] > th) {
         edge[x + y * epitch] = FLT_MAX;
@@ -54,10 +54,11 @@ hystfunc(const int32_t x, const int32_t y, float* edge, uint8_t* hyst,
 void __stdcall
 hysteresis(uint8_t* hystp, const size_t hpitch, float* blurp,
            const size_t bpitch, const int width, const int height,
-           const float tmin, const float tmax)
+           const float tmin, const float tmax) noexcept
 {
     memset(hystp, 0, hpitch * height);
     std::vector<Pos> stack;
+    stack.reserve(512);
 
     for (int32_t y = 0; y < height; ++y) {
         for (int32_t x = 0; x < width; ++x) {