diff --git a/include/geom/stored_range.h b/include/geom/stored_range.h index 387bb1757a2..6aadbdf7344 100644 --- a/include/geom/stored_range.h +++ b/include/geom/stored_range.h @@ -113,6 +113,39 @@ class StoredRange // specifically, do *not* copy the vector } + /** + * NOTE: When using pthreads this constructor is MANDATORY!!! + * + * Copy constructor. The \p StoredRange can be copied into + * subranges for parallel execution. In this way the + * initial \p StoredRange can be thought of as the root of + * a binary tree. The root element is the only element + * which interacts with the user. It takes a specified + * range of objects and packs it into a contiguous vector + * which can be split efficiently. However, there is no need + * for the child ranges to contain this vector, so long as + * the parent outlives the children. So we implement + * the copy constructor to specifically omit the \p _objs + * vector. This version allows you to set the beginning and + * ending of this new range to be different from that of the + * one we're copying. + */ + StoredRange (const StoredRange &er, + const const_iterator &begin, + const const_iterator &end): + _end(end), + _begin(begin), + _last(0), // Initialize these in a moment + _first(0), + _grainsize(er._grainsize), + _objs() + { + // specifically, do *not* copy the vector + + _first = std::distance(er._begin, _begin); + _last = _first + std::distance(_begin, _end); + } + /** * Splits the range \p r. The first half * of the range is left in place, the second diff --git a/include/parallel/threads.h b/include/parallel/threads.h index 73d7e36c7b5..8637b64b33a 100644 --- a/include/parallel/threads.h +++ b/include/parallel/threads.h @@ -44,6 +44,13 @@ # include "tbb/tbb_thread.h" #endif +#ifdef LIBMESH_HAVE_PTHREAD +#include +#include +#include +#include +#include +#endif // Thread-Local-Storage macros @@ -310,9 +317,349 @@ namespace Threads template class atomic : public tbb::atomic {}; +#else //LIBMESH_HAVE_TBB_API +#ifdef LIBMESH_HAVE_PTHREAD + template + unsigned int num_pthreads(Range & range) + { + return std::min((unsigned long)libMesh::n_threads(), range.size()); + } + template + class RangeBody + { + public: + Range * range; + Body * body; + }; -#else //LIBMESH_HAVE_TBB_API + template + void * run_body(void * args) + { + + RangeBody * range_body = (RangeBody*)args; + + Body & body = *range_body->body; + Range & range = *range_body->range; + + body(range); + + return NULL; + } + + //------------------------------------------------------------------- + /** + * Scheduler to manage threads. + */ + class task_scheduler_init + { + public: + static const int automatic = -1; + explicit task_scheduler_init (int = automatic) {} + void initialize (int = automatic) {} + void terminate () {} + }; + + //------------------------------------------------------------------- + /** + * Dummy "splitting object" used to distinguish splitting constructors + * from copy constructors. + */ + class split {}; + + //------------------------------------------------------------------- + /** + * Exectue the provided function object in parallel on the specified + * range. + */ + template + inline + void parallel_for (const Range &range, const Body &body) + { + Threads::BoolAcquire b(Threads::in_threads); + + unsigned int n_threads = num_pthreads(range); + + std::vector ranges(n_threads); + std::vector > range_bodies(n_threads); + std::vector threads(n_threads); + + // Create the ranges for each thread + unsigned int range_size = range.size() / n_threads; + + typename Range::const_iterator current_beginning = range.begin(); + + for(unsigned int i=0; i, (void*)&range_bodies[i]); + + // Wait for them to finish + for(unsigned int i=0; i + inline + void parallel_for (const Range &range, const Body &body, const Partitioner &) + { + parallel_for(range, body); + } + + //------------------------------------------------------------------- + /** + * Exectue the provided reduction operation in parallel on the specified + * range. + */ + template + inline + void parallel_reduce (const Range &range, Body &body) + { + Threads::BoolAcquire b(Threads::in_threads); + + unsigned int n_threads = num_pthreads(range); + + std::vector ranges(n_threads); + std::vector bodies(n_threads); + std::vector > range_bodies(n_threads); + + // Create copies of the body for each thread + bodies[0] = &body; // Use the original body for the first one + for(unsigned int i=1; i threads(n_threads); + for(unsigned int i=0; i, (void*)&range_bodies[i]); + + // Wait for them to finish + for(unsigned int i=0; ijoin(*bodies[i]); + + // Clean up + for(unsigned int i=1; i + inline + void parallel_reduce (const Range &range, Body &body, const Partitioner &) + { + parallel_reduce(range, body); + } + + //------------------------------------------------------------------- + /** + * Spin mutex. Implements mutual exclusion by busy-waiting in user + * space for the lock to be acquired. + */ + class spin_mutex + { + public: + // Might want to use PTHREAD_MUTEX_ADAPTIVE_NP on Linux, but it's not available on OSX. + spin_mutex() { pthread_mutex_init(&mutex, NULL); } + ~spin_mutex() { pthread_mutex_destroy(&mutex); } + + void lock () { pthread_mutex_lock(&mutex); } + void unlock () { pthread_mutex_unlock(&mutex); } + + class scoped_lock + { + public: + scoped_lock () : smutex(NULL) {} + explicit scoped_lock ( spin_mutex& in_smutex ) : smutex(&in_smutex) { smutex->lock(); } + + ~scoped_lock () { release(); } + + void acquire ( spin_mutex& in_smutex ) { smutex = &in_smutex; smutex->lock(); } + void release () { if(smutex) smutex->unlock(); smutex = NULL; } + + private: + spin_mutex * smutex; + }; + + private: + pthread_mutex_t mutex; + }; + + //------------------------------------------------------------------- + /** + * Recursive mutex. Implements mutual exclusion by busy-waiting in user + * space for the lock to be acquired. + */ + class recursive_mutex + { + public: + // Might want to use PTHREAD_MUTEX_ADAPTIVE_NP on Linux, but it's not available on OSX. + recursive_mutex() + { + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&mutex, &attr); + } + ~recursive_mutex() { pthread_mutex_destroy(&mutex); } + + void lock () { pthread_mutex_lock(&mutex); } + void unlock () { pthread_mutex_unlock(&mutex); } + + class scoped_lock + { + public: + scoped_lock () : rmutex(NULL) {} + explicit scoped_lock ( recursive_mutex& in_rmutex ) : rmutex(&in_rmutex) { rmutex->lock(); } + + ~scoped_lock () { release(); } + + void acquire ( recursive_mutex& in_rmutex ) { rmutex = &in_rmutex; rmutex->lock(); } + void release () { if(rmutex) rmutex->unlock(); rmutex = NULL; } + + private: + recursive_mutex * rmutex; + }; + + private: + pthread_mutex_t mutex; + pthread_mutexattr_t attr; + }; + + //------------------------------------------------------------------- + /** + * Defines atomic operations which can only be executed on a + * single thread at a time. + */ + template + class atomic + { + public: + atomic () : val(0) {} + operator T () { return val; } + + T operator=( T value ) + { + spin_mutex::scoped_lock lock(smutex); + val = value; + return val; + } + + atomic& operator=( const atomic& value ) + { + spin_mutex::scoped_lock lock(smutex); + val = value; + return *this; + } + + + T operator+=(T value) + { + spin_mutex::scoped_lock lock(smutex); + val += value; + return val; + } + + T operator-=(T value) + { + spin_mutex::scoped_lock lock(smutex); + val -= value; + return val; + } + + T operator++() + { + spin_mutex::scoped_lock lock(smutex); + val++; + return val; + } + + T operator++(int) + { + spin_mutex::scoped_lock lock(smutex); + val++; + return val; + } + + T operator--() + { + spin_mutex::scoped_lock lock(smutex); + val--; + return val; + } + + T operator--(int) + { + spin_mutex::scoped_lock lock(smutex); + val--; + return val; + } + + private: + T val; + spin_mutex smutex; + }; + +#else //LIBMESH_HAVE_PTHREAD //------------------------------------------------------------------- /** @@ -443,7 +790,7 @@ namespace Threads T _val; }; - +#endif // LIBMESH_HAVE_PTHREAD #endif // #ifdef LIBMESH_HAVE_TBB_API diff --git a/m4/ax_pthread.m4 b/m4/ax_pthread.m4 new file mode 100644 index 00000000000..6d400ed4e8e --- /dev/null +++ b/m4/ax_pthread.m4 @@ -0,0 +1,317 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_pthread.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +# +# DESCRIPTION +# +# This macro figures out how to build C programs using POSIX threads. It +# sets the PTHREAD_LIBS output variable to the threads library and linker +# flags, and the PTHREAD_CFLAGS output variable to any special C compiler +# flags that are needed. (The user can also force certain compiler +# flags/libs to be tested by setting these environment variables.) +# +# Also sets PTHREAD_CC to any special C compiler that is needed for +# multi-threaded programs (defaults to the value of CC otherwise). (This +# is necessary on AIX to use the special cc_r compiler alias.) +# +# NOTE: You are assumed to not only compile your program with these flags, +# but also link it with them as well. e.g. you should link with +# $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS +# +# If you are only building threads programs, you may wish to use these +# variables in your default LIBS, CFLAGS, and CC: +# +# LIBS="$PTHREAD_LIBS $LIBS" +# CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +# CC="$PTHREAD_CC" +# +# In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant +# has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to that name +# (e.g. PTHREAD_CREATE_UNDETACHED on AIX). +# +# Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the +# PTHREAD_PRIO_INHERIT symbol is defined when compiling with +# PTHREAD_CFLAGS. +# +# ACTION-IF-FOUND is a list of shell commands to run if a threads library +# is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it +# is not found. If ACTION-IF-FOUND is not specified, the default action +# will define HAVE_PTHREAD. +# +# Please let the authors know if this macro fails on any platform, or if +# you have any other suggestions or comments. This macro was based on work +# by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help +# from M. Frigo), as well as ac_pthread and hb_pthread macros posted by +# Alejandro Forero Cuervo to the autoconf macro repository. We are also +# grateful for the helpful feedback of numerous users. +# +# Updated for Autoconf 2.68 by Daniel Richard G. +# +# LICENSE +# +# Copyright (c) 2008 Steven G. Johnson +# Copyright (c) 2011 Daniel Richard G. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 20 + +AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) +AC_DEFUN([AX_PTHREAD], [ +AC_REQUIRE([AC_CANONICAL_HOST]) +AC_LANG_PUSH([C]) +ax_pthread_ok=no + +# We used to check for pthread.h first, but this fails if pthread.h +# requires special compiler flags (e.g. on True64 or Sequent). +# It gets checked for in the link test anyway. + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) + AC_TRY_LINK_FUNC(pthread_join, ax_pthread_ok=yes) + AC_MSG_RESULT($ax_pthread_ok) + if test x"$ax_pthread_ok" = xno; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" +fi + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all, and "pthread-config" +# which is a program returning the flags for the Pth emulation library. + +ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) +# -pthreads: Solaris/gcc +# -mthreads: Mingw32/gcc, Lynx/gcc +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads too; +# also defines -D_REENTRANT) +# ... -mt is also the pthreads flag for HP/aCC +# pthread: Linux, etcetera +# --thread-safe: KAI C++ +# pthread-config: use pthread-config program (for GNU Pth library) + +case ${host_os} in + solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + + ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags" + ;; + + darwin*) + ax_pthread_flags="-pthread $ax_pthread_flags" + ;; +esac + +if test x"$ax_pthread_ok" = xno; then +for flag in $ax_pthread_flags; do + + case $flag in + none) + AC_MSG_CHECKING([whether pthreads work without any flags]) + ;; + + -*) + AC_MSG_CHECKING([whether pthreads work with $flag]) + PTHREAD_CFLAGS="$flag" + ;; + + pthread-config) + AC_CHECK_PROG(ax_pthread_config, pthread-config, yes, no) + if test x"$ax_pthread_config" = xno; then continue; fi + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; + + *) + AC_MSG_CHECKING([for the pthreads library -l$flag]) + PTHREAD_LIBS="-l$flag" + ;; + esac + + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + static void routine(void *a) { a = 0; } + static void *start_routine(void *a) { return a; }], + [pthread_t th; pthread_attr_t attr; + pthread_create(&th, 0, start_routine, 0); + pthread_join(th, 0); + pthread_attr_init(&attr); + pthread_cleanup_push(routine, 0); + pthread_cleanup_pop(0) /* ; */])], + [ax_pthread_ok=yes], + []) + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + AC_MSG_RESULT($ax_pthread_ok) + if test "x$ax_pthread_ok" = xyes; then + break; + fi + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$ax_pthread_ok" = xyes; then + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. + AC_MSG_CHECKING([for joinable pthread attribute]) + attr_name=unknown + for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [int attr = $attr; return attr /* ; */])], + [attr_name=$attr; break], + []) + done + AC_MSG_RESULT($attr_name) + if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then + AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name, + [Define to necessary symbol if this constant + uses a non-standard name on your system.]) + fi + + AC_MSG_CHECKING([if more special flags are required for pthreads]) + flag=no + case ${host_os} in + aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";; + osf* | hpux*) flag="-D_REENTRANT";; + solaris*) + if test "$GCC" = "yes"; then + flag="-D_REENTRANT" + else + flag="-mt -D_REENTRANT" + fi + ;; + esac + AC_MSG_RESULT(${flag}) + if test "x$flag" != xno; then + PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" + fi + + AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], + ax_cv_PTHREAD_PRIO_INHERIT, [ + AC_LINK_IFELSE([ + AC_LANG_PROGRAM([[#include ]], [[int i = PTHREAD_PRIO_INHERIT;]])], + [ax_cv_PTHREAD_PRIO_INHERIT=yes], + [ax_cv_PTHREAD_PRIO_INHERIT=no]) + ]) + AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes"], + AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], 1, [Have PTHREAD_PRIO_INHERIT.])) + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + # More AIX lossage: compile with *_r variant + if test "x$GCC" != xyes; then + case $host_os in + aix*) + AS_CASE(["x/$CC"], + [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6], + [#handle absolute path differently from PATH based program lookup + AS_CASE(["x$CC"], + [x/*], + [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])], + [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])]) + ;; + esac + fi +fi + +test -n "$PTHREAD_CC" || PTHREAD_CC="$CC" + +AC_SUBST(PTHREAD_LIBS) +AC_SUBST(PTHREAD_CFLAGS) +AC_SUBST(PTHREAD_CC) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$ax_pthread_ok" = xyes; then + ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1]) + : +else + ax_pthread_ok=no + $2 +fi +AC_LANG_POP +])dnl AX_PTHREAD diff --git a/src/mesh/mesh_tools.C b/src/mesh/mesh_tools.C index 05a36c9b197..8e7029a0f29 100644 --- a/src/mesh/mesh_tools.C +++ b/src/mesh/mesh_tools.C @@ -77,7 +77,7 @@ namespace { // If we don't have threads we never need a join, and icpc yells a // warning if it sees an anonymous function that's never used -#ifdef LIBMESH_HAVE_TBB_API +#if defined(LIBMESH_HAVE_PTHREAD) || defined(LIBMESH_HAVE_TBB_API) void join (const SumElemWeight &other) { _weight += other.weight(); } #endif @@ -146,7 +146,7 @@ namespace { // If we don't have threads we never need a join, and icpc yells a // warning if it sees an anonymous function that's never used -#ifdef LIBMESH_HAVE_TBB_API +#if defined(LIBMESH_HAVE_PTHREAD) || defined(LIBMESH_HAVE_TBB_API) void join (const FindBBox &other) { for (unsigned int i=0; i