diff --git a/.dir-locals.el b/.dir-locals.el index 711f4a63df..c0dc5741b7 100644 --- a/.dir-locals.el +++ b/.dir-locals.el @@ -1,11 +1,32 @@ -;; Emacs C mode formatting for the BLIS layout requirements. -((c-mode . ((c-file-style . "stroustrup") - (c-basic-offset . 4) - (comment-start . "// ") - (comment-end . "") - (indent-tabs-mode . t) - (tab-width . 4) - (parens-require-spaces . nil) - (require-final-newline . t) - (eval add-hook `before-save-hook `delete-trailing-whitespace) - ))) +;; Emacs formatting for the BLIS layout requirements. + +( + ;; Recognize *.mk files as Makefile fragments + (auto-mode-alist . (("\\.mk\\'" . makefile-mode)) ) + + ;; Makefiles require tabs and are almost always width 8 + (makefile-mode . ( + (indent-tabs-mode . t) + (tab-width . 8) + ) + ) + + ;; C code formatting roughly according to docs/CodingConventions.md + (c-mode . ( + (c-file-style . "bsd") + (c-basic-offset . 4) + (comment-start . "// ") + (comment-end . "") + (parens-require-spaces . nil) + ) + ) + + ;; Default formatting for all source files not overriden above + (prog-mode . ( + (indent-tabs-mode . nil) + (tab-width . 4) + (require-final-newline . t) + (eval add-hook `before-save-hook `delete-trailing-whitespace) + ) + ) +) diff --git a/.travis.yml b/.travis.yml index f7269b3d2c..0ae55e3387 100644 --- a/.travis.yml +++ b/.travis.yml @@ -62,6 +62,15 @@ matrix: CC=aarch64-linux-gnu-gcc-10 CXX=aarch64-linux-gnu-g++-10 \ PACKAGES="gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user" \ TESTSUITE_WRAPPER="qemu-aarch64 -cpu max,sve=true,sve512=true -L /usr/aarch64-linux-gnu/" + # arm64 build and fast testsuite (qemu) + # NOTE: This entry omits the -cpu flag so that while both NEON and SVE kernels + # are compiled, only NEON kernels will be tested. (h/t to RuQing Xu) + - os: linux + compiler: aarch64-linux-gnu-gcc-10 + env: OOT=0 TEST=FAST SDE=0 THR="none" CONF="arm64" \ + CC=aarch64-linux-gnu-gcc-10 CXX=aarch64-linux-gnu-g++-10 \ + PACKAGES="gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user" \ + TESTSUITE_WRAPPER="qemu-aarch64 -L /usr/aarch64-linux-gnu/" install: - if [ "$CC" = "gcc" ] && [ "$TRAVIS_OS_NAME" = "linux" ]; then export CC="gcc-9"; fi - if [ -n "$PACKAGES" ] && [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo apt-get install -y $PACKAGES; fi diff --git a/CREDITS b/CREDITS index 0a13fca5f9..41d8c89701 100644 --- a/CREDITS +++ b/CREDITS @@ -5,124 +5,128 @@ Acknowledgements The BLIS framework was originally authored by - Field Van Zee @fgvanzee (The University of Texas at Austin) + Field Van Zee @fgvanzee (The University of Texas at Austin) -but many others have contributed code and feedback, including +but many others have contributed code, ideas, and feedback, including - Jay Acosta @jay-acosta (Oracle) - Sameer Agarwal @sandwichmaker (Google) - Murtaza Ali (Texas Instruments) - Sajid Ali @s-sajid-ali (Northwestern University) + Jay Acosta @jay-acosta (Oracle) + Sameer Agarwal @sandwichmaker (Google) + Murtaza Ali (Texas Instruments) + Sajid Ali @s-sajid-ali (Northwestern University) Erling Andersen @erling-d-andersen Alex Arslan @ararslan - Vernon Austel (IBM, T.J. Watson Research Center) - Satish Balay @balay (Argonne National Laboratory) + Vernon Austel (IBM, T.J. Watson Research Center) + Mohsen Aznaveh @Aznaveh (Texas A&M University) + Satish Balay @balay (Argonne National Laboratory) Kihiro Bando @bandokihiro - Matthew Brett @matthew-brett (University of Birmingham) + Matthew Brett @matthew-brett (University of Birmingham) Jérémie du Boisberranger @jeremiedbb - Jed Brown @jedbrown (Argonne National Laboratory) + Jed Brown @jedbrown (Argonne National Laboratory) Robin Christ @robinchrist Dilyn Corner @dilyn-corner - Mat Cross @matcross (NAG) + Mat Cross @matcross (NAG) @decandia50 - Daniël de Kok @danieldk (Explosion) - Kay Dewhurst @jkd2016 (Max Planck Institute, Halle, Germany) - Jeff Diamond (Oracle) + Harsh Dave @HarshDave12 (AMD) + Tim Davis @DrTimothyAldenDavis (Texas A&M University) + Daniël de Kok @danieldk (Explosion) + Kay Dewhurst @jkd2016 (Max Planck Institute, Halle, Germany) + Jeff Diamond (Oracle) Johannes Dieterich @iotamudelta Krzysztof Drewniak @krzysz00 - Marat Dukhan @Maratyszcza (Google) - Victor Eijkhout @VictorEijkhout (Texas Advanced Computing Center) - Evgeny Epifanovsky @epifanovsky (Q-Chem) + Marat Dukhan @Maratyszcza (Google) + Victor Eijkhout @VictorEijkhout (Texas Advanced Computing Center) + Evgeny Epifanovsky @epifanovsky (Q-Chem) Isuru Fernando @isuruf Roman Gareev @gareevroman Richard Goldschmidt @SuperFluffy Chris Goodyer Alexander Grund @Flamefire - John Gunnels @jagunnels (IBM, T.J. Watson Research Center) + John Gunnels @jagunnels (IBM, T.J. Watson Research Center) Ali Emre Gülcü @Lephar - Jeff Hammond @jeffhammond (Intel) + Jeff Hammond @jeffhammond (Intel) Jacob Gorm Hansen @jacobgorm - Shivaprashanth H (Global Edge) + Shivaprashanth H (Global Edge) Jean-Michel Hautbois @jhautbois Ian Henriksen @insertinterestingnamehere (The University of Texas at Austin) - Greg Henry (Intel) + Greg Henry (Intel) Minh Quan Ho @hominhquan Matthew Honnibal @honnibal Stefan Husmann @stefanhusmann - Francisco Igual @figual (Universidad Complutense de Madrid) + Francisco Igual @figual (Universidad Complutense de Madrid) Madeesh Kannan @shadeMe Tony Kelman @tkelman - Lee Killough @leekillough (Cray) - Mike Kistler @mkistler (IBM, Austin Research Laboratory) - Ivan Korostelev @ivan23kor (University of Alberta) - Kyungmin Lee @kyungminlee (Ohio State University) + Lee Killough @leekillough (Cray) + Mike Kistler @mkistler (IBM, Austin Research Laboratory) + Ivan Korostelev @ivan23kor (University of Alberta) + Kyungmin Lee @kyungminlee (Ohio State University) Michael Lehn @michael-lehn Shmuel Levine @ShmuelLevine @lschork2 Dave Love @loveshack - Tze Meng Low (The University of Texas at Austin) - Ye Luo @ye-luo (Argonne National Laboratory) - Ricardo Magana @magania (Hewlett Packard Enterprise) - Madan mohan Manokar @madanm3 (AMD) + Tze Meng Low (The University of Texas at Austin) + Ye Luo @ye-luo (Argonne National Laboratory) + Ricardo Magana @magania (Hewlett Packard Enterprise) + Madan mohan Manokar @madanm3 (AMD) Giorgos Margaritis - Bryan Marker @bamarker (The University of Texas at Austin) - Simon Lukas Märtens @ACSimon33 (RWTH Aachen University) - Devin Matthews @devinamatthews (The University of Texas at Austin) + Bryan Marker @bamarker (The University of Texas at Austin) + Simon Lukas Märtens @ACSimon33 (RWTH Aachen University) + Devin Matthews @devinamatthews (The University of Texas at Austin) Stefanos Mavros @smavros - Mithun Mohan @MithunMohanKadavil (AMD) + Mithun Mohan @MithunMohanKadavil (AMD) @moon-chilled Ilknur Mustafazade @Runkli @nagsingh - Bhaskar Nallani @BhaskarNallani (AMD) - Stepan Nassyr @stepannassyr (Jülich Supercomputing Centre) + Bhaskar Nallani @BhaskarNallani (AMD) + Stepan Nassyr @stepannassyr (Jülich Supercomputing Centre) Nisanth M P @nisanthmp - Nisanth Padinharepatt (AMD) + Nisanth Padinharepatt (AMD) Ajay Panyala @ajaypanyala - Marc-Antoine Parent @maparent (Conversence) - Devangi Parikh @dnparikh (The University of Texas at Austin) - Elmar Peise @elmar-peise (RWTH-Aachen) + Marc-Antoine Parent @maparent (Conversence) + Devangi Parikh @dnparikh (The University of Texas at Austin) + Elmar Peise @elmar-peise (RWTH-Aachen) Clément Pernet @ClementPernet Ilya Polkovnichenko - Jack Poulson @poulson (Stanford) + Jack Poulson @poulson (Stanford) Mathieu Poumeyrol @kali - Christos Psarras @ChrisPsa (RWTH Aachen University) + Christos Psarras @ChrisPsa (RWTH Aachen University) @pkubaj @qnerd Michael Rader @mrader1248 - Pradeep Rao @pradeeptrgit (AMD) + Pradeep Rao @pradeeptrgit (AMD) Aleksei Rechinskii - Leick Robinson @LeickR (Oracle) + Leick Robinson @LeickR (Oracle) Karl Rupp @karlrupp - Paul Sandoz @PaulSandoz (Oracle) - Martin Schatz (The University of Texas at Austin) + Paul Sandoz @PaulSandoz (Oracle) + Martin Schatz (The University of Texas at Austin) Nico Schlömer @nschloe Rene Sitt - Tony Skjellum @tonyskjellum (The University of Tennessee at Chattanooga) - Mikhail Smelyanskiy (Intel, Parallel Computing Lab) + Tony Skjellum @tonyskjellum (The University of Tennessee at Chattanooga) + Mikhail Smelyanskiy (Intel, Parallel Computing Lab) Nathaniel Smith @njsmith Shaden Smith @ShadenSmith - Tyler Smith @tlrmchlsmth (The University of Texas at Austin) + Tyler Smith @tlrmchlsmth (The University of Texas at Austin) Snehith @ArcadioN09 - Paul Springer @springer13 (RWTH Aachen University) - Adam J. Stewart @adamjstewart (University of Illinois at Urbana-Champaign) + Paul Springer @springer13 (RWTH Aachen University) + Adam J. Stewart @adamjstewart (University of Illinois at Urbana-Champaign) Vladimir Sukarev + Harihara Sudhan S @ihariharasudhan (AMD) Chengguo Sun @chengguosun - Santanu Thangaraj (AMD) - Nicholai Tukanov @nicholaiTukanov (The University of Texas at Austin) - Rhys Ulerich @RhysU (The University of Texas at Austin) - Robert van de Geijn @rvdg (The University of Texas at Austin) - Meghana Vankadari @Meghana-vankadari (AMD) - Kiran Varaganti @kvaragan (AMD) - Natalia Vassilieva (Hewlett Packard Enterprise) + Santanu Thangaraj (AMD) + Nicholai Tukanov @nicholaiTukanov (The University of Texas at Austin) + Rhys Ulerich @RhysU (The University of Texas at Austin) + Robert van de Geijn @rvdg (The University of Texas at Austin) + Meghana Vankadari @Meghana-vankadari (AMD) + Kiran Varaganti @kvaragan (AMD) + Natalia Vassilieva (Hewlett Packard Enterprise) @h-vetinari - Andrew Wildman @awild82 (University of Washington) - Zhang Xianyi @xianyi (Chinese Academy of Sciences) + Andrew Wildman @awild82 (University of Washington) + Zhang Xianyi @xianyi (Chinese Academy of Sciences) Benda Xu @heroxbd - Guodong Xu @docularxu (Linaro.org) - RuQing Xu @xrq-phys (The University of Tokyo) + Guodong Xu @docularxu (Linaro.org) + RuQing Xu @xrq-phys (The University of Tokyo) Costas Yamin @cosstas - Chenhan Yu @ChenhanYu (The University of Texas at Austin) - Roman Yurchak @rth (Symerio) + Chenhan Yu @ChenhanYu (The University of Texas at Austin) + Roman Yurchak @rth (Symerio) Stefano Zampini @stefanozampini M. Zhou @cdluminate diff --git a/build/bli_config.h.in b/build/bli_config.h.in index 7dc67059f8..9636278d9c 100644 --- a/build/bli_config.h.in +++ b/build/bli_config.h.in @@ -45,6 +45,8 @@ // Enabled kernel sets (kernel_list) @kernel_list_defines@ +#define BLIS_VERSION_STRING "@version@" + #if @enable_system@ #define BLIS_ENABLE_SYSTEM #else diff --git a/common.mk b/common.mk index 68b075c82b..23bcb34971 100644 --- a/common.mk +++ b/common.mk @@ -101,7 +101,7 @@ get-noopt-cflags-for = $(strip $(CFLAGS_PRESET) \ $(call load-var-for,CLANGFLAGS,$(1)) \ $(call load-var-for,CPPROCFLAGS,$(1)) \ $(CTHREADFLAGS) \ - $(CINCFLAGS) $(VERS_DEF) \ + $(CINCFLAGS) \ ) get-noopt-cxxflags-for = $(strip $(CFLAGS_PRESET) \ @@ -113,7 +113,7 @@ get-noopt-cxxflags-for = $(strip $(CFLAGS_PRESET) \ $(call load-var-for,CPPROCFLAGS,$(1)) \ $(CTHREADFLAGS) \ $(CXXTHREADFLAGS) \ - $(CINCFLAGS) $(VERS_DEF) \ + $(CINCFLAGS) \ ) get-refinit-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ @@ -534,6 +534,7 @@ GREP := grep EGREP := grep -E XARGS := xargs INSTALL := install -c +DEVNULL := /dev/null # Script for creating a monolithic header file. #FLATTEN_H := $(DIST_PATH)/build/flatten-headers.sh @@ -1193,7 +1194,18 @@ CBLAS_H_FLAT := $(BASE_INC_PATH)/$(CBLAS_H) # files will be needed when compiling bli_cntx_ref.c with the monolithic header. ifeq ($(strip $(SHARE_PATH)),.) REF_KER_SRC := $(DIST_PATH)/$(REFKERN_DIR)/bli_cntx_ref.c -REF_KER_HEADERS := $(shell $(GREP) "\#include" $(REF_KER_SRC) | sed -e "s/\#include [\"<]\([a-zA-Z0-9\_\.\/\-]*\)[\">].*/\1/g" | $(GREP) -v $(BLIS_H)) +# +# NOTE: A redirect to /dev/null has been added to the grep command below because +# as of version 3.8, grep outputs warnings when encountering stray backslashes +# in regular expressions [1]. Versions older than 3.8 not only do not complain, +# but actually seem to *require* the backslash, perhaps because of the way we +# are invoking grep via GNU make's shell command. WHEN DEBUGGING ANYTHING +# INVOLVING THE MAKE VARIABLE BELOW, PLEASE CONSIDER TEMPORARILY REMOVING THE +# REDIRECT TO /dev/null SO THAT YOU SEE ANY MESSAGES SENT TO STANDARD ERROR. +# +# [1] https://lists.gnu.org/archive/html/info-gnu/2022-09/msg00001.html +# +REF_KER_HEADERS := $(shell $(GREP) "\#include" $(REF_KER_SRC) 2> $(DEVNULL) | sed -e "s/\#include [\"<]\([a-zA-Z0-9\_\.\/\-]*\)[\">].*/\1/g" | $(GREP) -v $(BLIS_H)) endif # Match each header found above with the path to that header, and then strip @@ -1244,10 +1256,6 @@ BLIS_CONFIG_H := ./bli_config.h # --- Special preprocessor macro definitions ----------------------------------- # -# Define a C preprocessor macro to communicate the current version so that it -# can be embedded into the library and queried later. -VERS_DEF := -DBLIS_VERSION_STRING=\"$(VERSION)\" - # Define a C preprocessor flag that is *only* defined when BLIS is being # compiled. (In other words, an application that #includes blis.h will not # get this cpp macro.) diff --git a/config/arm64/bli_family_arm64.h b/config/arm64/bli_family_arm64.h index b242d70492..3fb08fc422 100644 --- a/config/arm64/bli_family_arm64.h +++ b/config/arm64/bli_family_arm64.h @@ -39,6 +39,8 @@ // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 + +#define BLIS_SIMD_MAX_SIZE 128 // Note: The default is 64. #define BLIS_SIMD_MAX_NUM_REGISTERS 32 // SVE-specific configs. diff --git a/config/old/newarch/make_defs.mk b/config/old/newarch/make_defs.mk index 523e0b13bc..59393c56fa 100644 --- a/config/old/newarch/make_defs.mk +++ b/config/old/newarch/make_defs.mk @@ -1,6 +1,6 @@ -#!/bin/bash # -# BLIS +# +# BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # @@ -47,7 +47,7 @@ CC := gcc CC_VENDOR := gcc endif -# Enable IEEE Standard 1003.1-2004 (POSIX.1d). +# Enable IEEE Standard 1003.1-2004 (POSIX.1d). # NOTE: This is needed to enable posix_memalign(). CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L CMISCFLAGS := -std=c99 @@ -67,13 +67,13 @@ endif CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) -CKVECFLAGS := +CKVECFLAGS := else ifeq ($(CC_VENDOR),icc) -CKVECFLAGS := +CKVECFLAGS := else ifeq ($(CC_VENDOR),clang) -CKVECFLAGS := +CKVECFLAGS := else $(error gcc, icc, or clang is required for this configuration.) endif @@ -83,4 +83,3 @@ endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) - diff --git a/config/power/bli_family_power.h b/config/power/bli_family_power.h new file mode 100644 index 0000000000..21b44db870 --- /dev/null +++ b/config/power/bli_family_power.h @@ -0,0 +1,41 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_FAMILY_H +//#define BLIS_FAMILY_H + + + +//#endif + diff --git a/config/power/make_defs.mk b/config/power/make_defs.mk new file mode 100644 index 0000000000..2a366f1e2c --- /dev/null +++ b/config/power/make_defs.mk @@ -0,0 +1,82 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := power +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +CKVECFLAGS := + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/config_registry b/config_registry index 7753db71fa..e80f27b752 100644 --- a/config_registry +++ b/config_registry @@ -14,6 +14,7 @@ amd64_legacy: excavator steamroller piledriver bulldozer generic amd64: zen3 zen2 zen generic arm64: armsve firestorm thunderx2 cortexa57 cortexa53 generic arm32: cortexa15 cortexa9 generic +power: power10 power9 generic # Intel architectures. skx: skx/skx/haswell/zen diff --git a/configure b/configure index 06201b4fa9..d45b0ba9d1 100755 --- a/configure +++ b/configure @@ -46,396 +46,396 @@ print_usage() fi # Echo usage info. - echo " " - echo " ${script_name} (BLIS ${version})" - #echo " " - #echo " BLIS ${version}" - echo " " - echo " Configure BLIS's build system for compilation using a specified" - echo " configuration directory." - echo " " - echo " Usage:" - echo " " - echo " ${script_name} [options] [env. vars.] confname" - echo " " - echo " Arguments:" - echo " " - echo " confname The name of the sub-directory inside of the 'config'" - echo " directory containing the desired BLIS configuration." - echo " Note that confname MUST be specified; if it is not," - echo " configure will complain. To build a completely generic" - echo " implementation, use the 'generic' configuration" - echo " " - echo " Options:" - echo " " - echo " -p PREFIX, --prefix=PREFIX" - echo " " - echo " The common installation prefix for all files. If given," - echo " this option effectively implies:" - echo " --libdir=EXECPREFIX/lib" - echo " --includedir=PREFIX/include" - echo " --sharedir=PREFIX/share" - echo " where EXECPREFIX defaults to PREFIX. If this option is" - echo " not given, PREFIX defaults to '${prefix_def}'. If PREFIX" - echo " refers to a directory that does not exist, it will be" - echo " created." - echo " " - echo " --exec-prefix=EXECPREFIX" - echo " " - echo " The installation prefix for libraries. Specifically, if" - echo " given, this option effectively implies:" - echo " --libdir=EXECPREFIX/lib" - echo " If not given, EXECPREFIX defaults to PREFIX, which may be" - echo " modified by the --prefix option. If EXECPREFIX refers to" - echo " a directory that does not exist, it will be created." - echo " " - echo " --libdir=LIBDIR" - echo " " - echo " The path to which make will install libraries. If not" - echo " given, LIBDIR defaults to PREFIX/lib. If LIBDIR refers to" - echo " a directory that does not exist, it will be created." - echo " " - echo " --includedir=INCDIR" - echo " " - echo " The path to which make will install development header" - echo " files. If not given, INCDIR defaults to PREFIX/include." - echo " If INCDIR refers to a directory that does not exist, it" - echo " will be created." - echo " " - echo " --sharedir=SHAREDIR" - echo " " - echo " The path to which make will makefile fragments containing" - echo " make variables determined by configure (e.g. CC, CFLAGS," - echo " and LDFLAGS). These files allow certain BLIS makefiles," - echo " such as those in the examples or testsuite directories, to" - echo " operate on an installed copy of BLIS rather than a local" - echo " (and possibly uninstalled) copy. If not given, SHAREDIR" - echo " defaults to PREFIX/share. If SHAREDIR refers to a" - echo " directory that does not exist, it will be created." - echo " " - echo " --enable-verbose-make, --disable-verbose-make" - echo " " - echo " Enable (disabled by default) verbose compilation output" - echo " during make." - echo " " - echo " --enable-arg-max-hack --disable-arg-max-hack" - echo " " - echo " Enable (disabled by default) build system logic that" - echo " will allow archiving/linking the static/shared library" - echo " even if the command plus command line arguments exceeds" - echo " the operating system limit (ARG_MAX)." - echo " " - echo " -d DEBUG, --enable-debug[=DEBUG]" - echo " " - echo " Enable debugging symbols in the library. If argument" - echo " DEBUG is given as 'opt', then optimization flags are" - echo " kept in the framework, otherwise optimization is" - echo " turned off." - echo " " - echo " --disable-static, --enable-static" - echo " " - echo " Disable (enabled by default) building BLIS as a static" - echo " library. If the static library build is disabled, the" - echo " shared library build must remain enabled." - echo " " - echo " --disable-shared, --enable-shared" - echo " " - echo " Disable (enabled by default) building BLIS as a shared" - echo " library. If the shared library build is disabled, the" - echo " static library build must remain enabled." - echo " " - echo " --enable-rpath, --disable-rpath" - echo " " - echo " Enable (disabled by default) setting an install_name for" - echo " dynamic libraries on macOS which starts with @rpath rather" - echo " than the absolute install path." - echo " " - echo " -e SYMBOLS, --export-shared[=SYMBOLS]" - echo " " - echo " Specify the subset of library symbols that are exported" - echo " within a shared library. Valid values for SYMBOLS are:" - echo " 'public' (the default) and 'all'. By default, only" - echo " functions and variables that belong to public APIs are" - echo " exported in shared libraries. However, the user may" - echo " instead export all symbols in BLIS, even those that were" - echo " intended for internal use only. Note that the public APIs" - echo " encompass all functions that almost any user would ever" - echo " want to call, including the BLAS/CBLAS compatibility APIs" - echo " as well as the basic and expert interfaces to the typed" - echo " and object APIs that are unique to BLIS. Also note that" - echo " changing this option to 'all' will have no effect in some" - echo " environments, such as when compiling with clang on" - echo " Windows." - echo " " - echo " -t MODEL, --enable-threading[=MODEL], --disable-threading" - echo " " - echo " Enable threading in the library, using threading model(s)" - echo " MODEL={single,openmp,pthreads,hpx,auto}. If multiple values" - echo " are specified within MODEL, they will all be compiled into" - echo " BLIS, and the choice of which to use will be determined at" - echo " runtime. If the user does not express a preference (by" - echo " setting the BLIS_THREAD_IMPL environment variable to" - echo " 'single', 'openmp', 'pthreads', or 'hpx'; by calling the" - echo " global runtime API bli_thread_set_thread_impl(); or by" - echo " encoding a choice on a per-call basis within a rntm_t" - echo " passed into the expert API), then the first model listed" - echo " in MODEL will be used by default. Note that 'single' is" - echo " silently appended to whatever the user specifies in MODEL," - echo " meaning that single-threaded functionality will always be" - echo " available, even if it is not requested and even if it is" - echo " not enabled by default. Even --disable-threading is" - echo " actually shorthand for --enable-threading=single (which is" - echo " the default when the option is not specified)." - echo " " - echo " --enable-system, --disable-system" - echo " " - echo " Enable conventional operating system support, such as" - echo " pthreads for thread-safety. The default state is enabled." - echo " However, in rare circumstances you may wish to configure" - echo " BLIS for use with a minimal or nonexistent operating" - echo " system (e.g. hardware simulators). In these situations," - echo " --disable-system may be used to jettison all compile-time" - echo " and link-time dependencies outside of the standard C" - echo " library. When disabled, this option also forces the use" - echo " of --disable-threading." - echo " " - echo " --disable-pba-pools, --enable-pba-pools" - echo " --disable-sba-pools, --enable-sba-pools" - echo " " - echo " Disable (enabled by default) use of internal memory pools" - echo " within the packing block allocator (pba) and/or the small" - echo " block allocator (sba). The former is used to allocate" - echo " memory used to pack submatrices while the latter is used" - echo " to allocate control/thread tree nodes and thread" - echo " communicators. Both allocations take place in the context" - echo " of level-3 operations. When the pba is disabled, the" - echo " malloc()-like function specified by BLIS_MALLOC_POOL is" - echo " called on-demand whenever a packing block is needed, and" - echo " when the sba is disabled, the malloc()-like function" - echo " specified by BLIS_MALLOC_INTL is called whenever a small" - echo " block is needed, with the two allocators calling free()-" - echo " like functions BLIS_FREE_POOL and BLIS_FREE_INTL," - echo " respectively when blocks are released. When enabled," - echo " either or both pools are populated via the same functions" - echo " mentioned previously, and henceforth blocks are checked" - echo " out and in. The library quickly reaches a state in which" - echo " it no longer needs to call malloc() or free(), even" - echo " across many separate level-3 operation invocations." - echo " " - echo " --enable-mem-tracing, --disable-mem-tracing" - echo " " - echo " Enable (disabled by default) output to stdout that traces" - echo " the allocation and freeing of memory, including the names" - echo " of the functions that triggered the allocation/freeing." - echo " Enabling this option WILL NEGATIVELY IMPACT PERFORMANCE." - echo " Please use only for informational/debugging purposes." - echo " " - echo " --enable-asan, --disable-asan" - echo " " - echo " Enable (disabled by default) compiling and linking BLIS" - echo " framework code with the AddressSanitizer (ASan) library." - echo " Optimized kernels are NOT compiled with ASan support due" - echo " to limitations of register assignment in inline assembly." - echo " WARNING: ENABLING THIS OPTION WILL NEGATIVELY IMPACT" - echo " PERFORMANCE. Please use only for informational/debugging" - echo " purposes." - echo " " - echo " -i SIZE, --int-size=SIZE" - echo " " - echo " Set the size (in bits) of internal BLIS integers and" - echo " integer types used in native BLIS interfaces. The" - echo " default integer type size is architecture dependent." - echo " (Hint: You can always find this value printed at the" - echo " beginning of the testsuite output.)" - echo " " - echo " -b SIZE, --blas-int-size=SIZE" - echo " " - echo " Set the size (in bits) of integer types in external" - echo " BLAS and CBLAS interfaces, if enabled. The default" - echo " integer type size used in BLAS/CBLAS is 32 bits." - echo " " - echo " --disable-blas, --enable-blas" - echo " " - echo " Disable (enabled by default) building the BLAS" - echo " compatibility layer." - echo " " - echo " --enable-cblas, --disable-cblas" - echo " " - echo " Enable (disabled by default) building the CBLAS" - echo " compatibility layer. This automatically enables the" - echo " BLAS compatibility layer as well." - echo " " - echo " --disable-mixed-dt, --enable-mixed-dt" - echo " " - echo " Disable (enabled by default) support for mixing the" - echo " storage domain and/or storage precision of matrix" - echo " operands for the gemm operation, as well as support" - echo " for computing in a precision different from one or" - echo " both of matrices A and B." - echo " " - echo " --disable-mixed-dt-extra-mem, --enable-mixed-dt-extra-mem" - echo " " - echo " Disable (enabled by default) support for additional" - echo " mixed datatype optimizations that require temporarily" - echo " allocating extra memory--specifically, a single m x n" - echo " matrix (per application thread) whose storage datatype" - echo " is equal to the computation datatype. This option may" - echo " only be enabled when mixed domain/precision support is" - echo " enabled." - echo " " - echo " --disable-sup-handling, --enable-sup-handling" - echo " " - echo " Disable (enabled by default) handling of small/skinny" - echo " matrix problems via separate code branches. When disabled," - echo " these small/skinny level-3 operations will be performed by" - echo " the conventional implementation, which is optimized for" - echo " medium and large problems. Note that what qualifies as" - echo " \"small\" depends on thresholds that may vary by sub-" - echo " configuration." - echo " " - echo " --enable-amd-frame-tweaks, --disable-amd-frame-tweaks" - echo " " - echo " Enable building with certain framework files that have" - echo " been customized by AMD for Zen-based microarchitectures." - echo " The default counterparts of these files must be portable," - echo " and so these customized files may provide some (typically" - echo " modest) performance improvement for some select operations" - echo " and/or APIs, though there may a few (tiny dimension) cases" - echo " where the improvement is more pronounced. Note that the" - echo " target configuration must be Zen-based (or 'amd64') for" - echo " this option to have any effect. (Also note that this" - echo " option is NOT to be confused with enabling AMD *kernels*," - echo " which are determined by the BLIS subconfiguration used at" - echo " runtime.) By default, these customized files are disabled." - echo " " - echo " -a NAME --enable-addon=NAME" - echo " " - echo " Enable the code provided by an addon. An addon consists" - echo " of a separate directory of code that provides additional" - echo " APIs, implementations, and/or operations that would" - echo " otherwise not be present within a build of BLIS. This" - echo " option may be used multiple times to specify the inclusion" - echo " of multiple addons. By default, no addons are enabled." - echo " " - echo " -s NAME --enable-sandbox=NAME" - echo " " - echo " Enable a separate sandbox implementation of gemm. This" - echo " option disables BLIS's conventional gemm implementation" - echo " (which shares common infrastructure with other level-3" - echo " operations) and instead compiles and uses the code in" - echo " the NAME directory, which is expected to be a sub-" - echo " directory of 'sandbox'. By default, no sandboxes are" - echo " enabled." - echo " " - echo " --with-memkind, --without-memkind" - echo " " - echo " Forcibly enable or disable the use of libmemkind's" - echo " hbw_malloc() and hbw_free() as substitutes for malloc()" - echo " and free(), respectively, when allocating memory for" - echo " BLIS's memory pools, which are used to manage buffers" - echo " into which matrices are packed. The default behavior" - echo " for this option is environment-dependent; if configure" - echo " detects the presence of libmemkind, libmemkind is used" - echo " by default, and otherwise it is not used by default." - echo " " - echo " -r METHOD, --thread-part-jrir=METHOD" - echo " " - echo " Select a strategy for partitioning computation in JR and" - echo " IR loops and assigning that computation to threads. Valid" - echo " values for METHOD are 'rr', 'slab', and 'tlb':" - echo " 'rr': Assign the computation associated with whole" - echo " columns of microtiles to threads in a round-" - echo " robin fashion. When selected, round-robin" - echo " assignment is also employed during packing." - echo " 'slab': Partition the computation into N contiguous" - echo " regions, where each region contains a whole" - echo " number of microtile columns, and assign one" - echo " region to each thread. For some operations, the" - echo " number of microtile columns contained within a" - echo " given region may differ from that of other" - echo " regions, depending on how much work is implied" - echo " by each region. When selected, slab assignment" - echo " is also employed during packing." - echo " 'tlb': Tile-level load balancing is similar to slab," - echo " except that regions will be divided at a more" - echo " granular level (individual microtiles instead" - echo " of whole columns of microtiles) to ensure more" - echo " equitable assignment of work to threads. When" - echo " selected, tlb will only be employed for level-3" - echo " operations except trsm; due to practical and" - echo " algorithmic limitations, slab partitioning will" - echo " be used instead during packing and for trsm." - echo " The default strategy is 'slab'. NOTE: Specifying this" - echo " option constitutes a request, which may be ignored in" - echo " select situations if implementation has a good reason to" - echo " do so. (See description of 'tlb' above for an example of" - echo " this.)" - echo " " - echo " --disable-trsm-preinversion, --enable-trsm-preinversion" - echo " " - echo " Disable (enabled by default) pre-inversion of triangular" - echo " matrix diagonals when performing trsm. When pre-inversion" - echo " is enabled, diagonal elements are inverted outside of the" - echo " microkernel (e.g. during packing) so that the microkernel" - echo " can use multiply instructions. When disabled, division" - echo " instructions are used within the microkernel. Executing" - echo " these division instructions within the microkernel will" - echo " incur a performance penalty, but numerical robustness will" - echo " improve for certain cases involving denormal numbers that" - echo " would otherwise result in overflow in the pre-inverted" - echo " values." - echo " " - echo " --force-version=STRING" - echo " " - echo " Force configure to use an arbitrary version string" - echo " STRING. This option may be useful when repackaging" - echo " custom versions of BLIS by outside organizations." - echo " " - echo " -c, --show-config-lists" - echo " " - echo " Print the config and kernel lists, and kernel-to-config" - echo " map after they are read from file. This can be useful" - echo " when debugging certain configuration issues, and/or as" - echo " a sanity check to make sure these lists are constituted" - echo " as expected." - echo " " - echo " --complex-return=gnu|intel" - echo " " - echo " Specify the way in which complex numbers are returned" - echo " from Fortran functions, either \"gnu\" (return in" - echo " registers) or \"intel\" (return via hidden argument)." - echo " If not specified and the environment variable FC is set," - echo " attempt to determine the return type from the compiler." - echo " Otherwise, the default is \"gnu\"." - echo " " - echo " -q, --quiet Suppress informational output. By default, configure" - echo " is verbose. (NOTE: -q is not yet implemented)" - echo " " - echo " -h, --help Output this information and quit." - echo " " - echo " Environment Variables:" - echo " " - echo " CC Specifies the C compiler to use." - echo " CXX Specifies the C++ compiler to use (sandbox only)." - echo " FC Specifies the Fortran compiler to use (only to determine --complex-return)." - echo " AR Specifies the static library archiver to use." - echo " RANLIB Specifies the ranlib (library indexer) executable to use." - echo " PYTHON Specifies the python interpreter to use." - echo " CFLAGS Specifies additional compiler flags to use (prepended)." - echo " LDFLAGS Specifies additional linker flags to use (prepended)." - echo " LIBPTHREAD Pthreads library to use." - echo " " - echo " Environment variables are traditionally set prior to running configure:" - echo " " - echo " CC=gcc ./configure [options] haswell" - echo " " - echo " However, they may also be specified as command line options, e.g.:" - echo " " - echo " ./configure [options] CC=gcc haswell" - echo " " - echo " Note that not all compilers are compatible with a given" - echo " configuration." - echo " " + cat <