diff --git a/CMakeLists.txt b/CMakeLists.txt index 5af0835..f7ed492 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required (VERSION 3.12.4) -project (cmfrec VERSION 3.5.1) +project (cmfrec VERSION 3.5.2) set(CMAKE_BUILD_TYPE Release) ### Note: this build script allows configuring 4 things manually: @@ -366,11 +366,18 @@ endif() list(APPEND union_list ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) list(REMOVE_DUPLICATES union_list) +# https://stackoverflow.com/questions/26657082/detect-x86-architecture-in-cmake-file +if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") + set (IS_AMD64 TRUE) +else () + set (IS_AMD64 FALSE) +endif () + # See https://github.com/xianyi/OpenBLAS/issues/3237 set(CMAKE_REQUIRED_LINK_OPTIONS ${union_list}) set(CMAKE_REQUIRED_LIBRARIES ${union_list}) CHECK_LIBRARY_EXISTS("" "openblas_get_num_threads" "" HAS_OPENBLAS) -if (HAS_OPENBLAS) +if (HAS_OPENBLAS AND IS_AMD64) message(STATUS "Using OpenBLAS - will replace its SYR function.") add_compile_definitions(AVOID_BLAS_SYR) add_compile_definitions(HAS_OPENBLAS) @@ -380,11 +387,15 @@ else() add_compile_definitions(HAS_MKL) else() CHECK_LIBRARY_EXISTS("" "catlas_saxpby" "" HAS_ATLAS) - if (HAS_ATLAS) + if (HAS_ATLAS AND IS_AMD64) message(STATUS "Using ATLAS - will replace its SYR function.") message(WARNING "Note: ATLAS multi-threading might make this library very slow.") add_compile_definitions(HAS_ATLAS) add_compile_definitions(AVOID_BLAS_SYR) + else() + if (HAS_ATLAS) + message(WARNING "Note: ATLAS multi-threading might make this library very slow.") + endif() endif() endif() endif() diff --git a/DESCRIPTION b/DESCRIPTION index 261cba3..b05dcaa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: cmfrec Type: Package Title: Collective Matrix Factorization for Recommender Systems -Version: 3.5.1-1 +Version: 3.5.2 Authors@R: c( person(given="David", family="Cortes", role=c("aut", "cre", "cph"), email="david.cortes.rivera@gmail.com"), diff --git a/setup.py b/setup.py index d618a55..f10ac24 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ import numpy as np from Cython.Distutils import build_ext import sys, os, subprocess, warnings, re - +import platform found_omp = True def set_omp_false(): @@ -45,6 +45,8 @@ def set_omp_false(): except AttributeError: EXIT_SUCCESS = 0 +is_amd64 = platform.machine() in ("i386", "AMD64", "x86_64") + class build_ext_subclass( build_ext_with_blas ): def build_extensions(self): is_windows = sys.platform[:3].lower() == "win" @@ -348,7 +350,7 @@ def test_supports_clang_reassociate(self): setup( name = "cmfrec", packages = ["cmfrec"], - version = '3.5.1-4', + version = '3.5.2', description = 'Collective matrix factorization', author = 'David Cortes', url = 'https://github.com/david-cortes/cmfrec', @@ -373,7 +375,7 @@ def test_supports_clang_reassociate(self): ("USE_DOUBLE", None), ("NDEBUG", None), ("USE_FINDBLAS" if use_findblas else "NO_FINDBLAS", None), - ("USE_BLAS_SYR" if use_findblas else "AVOID_BLAS_SYR", None)] + ("USE_BLAS_SYR" if (use_findblas or not is_amd64) else "AVOID_BLAS_SYR", None)] ), Extension("cmfrec.wrapper_float", sources=["cmfrec/cfuns_float.pyx" if use_findblas else "cmfrec/cfuns_float_plusblas.pyx", @@ -385,7 +387,7 @@ def test_supports_clang_reassociate(self): ("USE_FLOAT", None), ("NDEBUG", None), ("USE_FINDBLAS" if use_findblas else "NO_FINDBLAS", None), - ("USE_BLAS_SYR" if use_findblas else "AVOID_BLAS_SYR", None)] + ("USE_BLAS_SYR" if (use_findblas or not is_amd64) else "AVOID_BLAS_SYR", None)] ), ] ) diff --git a/src/common.c b/src/common.c index 3d7f3c3..e2d6632 100644 --- a/src/common.c +++ b/src/common.c @@ -623,7 +623,7 @@ real_t fun_grad_cannonical_form *******************************************************************************/ -#ifdef AVOID_BLAS_SYR +#if defined(AVOID_BLAS_SYR) && !(defined(FOR_R) && !defined(__SSE__)) #undef cblas_tsyr #define cblas_tsyr(order, Uplo, N, alpha, X, incX, A, lda) \ custom_syr(N, alpha, X, A, lda)