Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob Patro committed Jul 4, 2020
2 parents 91091fc + 037106a commit 642dc19
Show file tree
Hide file tree
Showing 60 changed files with 7,156 additions and 1,321 deletions.
47 changes: 29 additions & 18 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ message(STATUS "CMAKE_BUILD_TYPE = ${CMAKE_BUILD_TYPE}")
## Set the standard required compile flags
# Nov 18th --- removed -DHAVE_CONFIG_H
set(REMOVE_WARNING_FLAGS "-Wno-unused-function;-Wno-unused-local-typedefs")
set(TGT_COMPILE_FLAGS "-ftree-vectorize;-funroll-loops;-fPIC;-fomit-frame-pointer;-O3;-DNDEBUG;-DSTX_NO_STD_STRING_VIEW")
set(TGT_COMPILE_FLAGS "-ftree-vectorize;-funroll-loops;-fPIC;-fomit-frame-pointer;-O3;-DNDEBUG;-DSTX_NO_STD_STRING_VIEW;-D__STDC_FORMAT_MACROS")
set(TGT_WARN_FLAGS "-Wall;-Wno-unknown-pragmas;-Wno-reorder;-Wno-unused-variable;-Wreturn-type;-Werror=return-type;${REMOVE_WARNING_FLAGS}")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
Expand Down Expand Up @@ -596,10 +596,15 @@ if (NOT CEREAL_FOUND)
endif()

## Try and find TBB first
find_package(TBB 2018.0 COMPONENTS tbb tbbmalloc tbbmalloc_proxy)
find_package(TBB 2019.0 COMPONENTS tbb tbbmalloc tbbmalloc_proxy)

## NOTE: we actually require at least 2019 U4 or greater
## since we are using tbb::global_control. However, they
## seem not to have tagged minor version numbers in their
## source. Check before release if we can bump to the 2020
## version (requires having tbb 2020 for OSX).
if (${TBB_FOUND})
if (${TBB_VERSION} VERSION_GREATER_EQUAL 2018.0)
if (${TBB_VERSION} VERSION_GREATER_EQUAL 2019.0)
message("FOUND SUITABLE TBB VERSION : ${TBB_VERSION}")
set(TBB_TARGET_EXISTED TRUE)
else()
Expand Down Expand Up @@ -627,7 +632,7 @@ endif()
message("Build system will fetch and build Intel Threading Building Blocks")
message("==================================================================")
# These are useful for the custom install step we'll do later
set(TBB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/oneTBB-2019_U8)
set(TBB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/oneTBB-2020.2)
set(TBB_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install)

if("${TBB_COMPILER}" STREQUAL "gcc")
Expand All @@ -640,10 +645,10 @@ set(TBB_CXXFLAGS "${TBB_CXXFLAGS} ${CXXSTDFLAG}")

externalproject_add(libtbb
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
DOWNLOAD_COMMAND curl -k -L https://github.com/intel/tbb/archive/2019_U8.tar.gz -o tbb-2019_U8.tgz &&
${SHASUM} 6b540118cbc79f9cbc06a35033c18156c21b84ab7b6cf56d773b168ad2b68566 tbb-2019_U8.tgz &&
tar -xzvf tbb-2019_U8.tgz
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/oneTBB-2019_U8
DOWNLOAD_COMMAND curl -k -L https://github.com/oneapi-src/oneTBB/archive/v2020.2.tar.gz -o tbb-2020_U2.tgz &&
${SHASUM} 4804320e1e6cbe3a5421997b52199e3c1a3829b2ecb6489641da4b8e32faf500 tbb-2020_U2.tgz &&
tar -xzvf tbb-2020_U2.tgz
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/oneTBB-2020.2
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
PATCH_COMMAND "${TBB_PATCH_STEP}"
CONFIGURE_COMMAND ""
Expand All @@ -652,9 +657,6 @@ externalproject_add(libtbb
BUILD_IN_SOURCE 1
)




set(RECONFIG_FLAGS ${RECONFIG_FLAGS} -DTBB_WILL_RECONFIGURE=FALSE -DTBB_RECONFIGURE=TRUE)
externalproject_add_step(libtbb reconfigure
COMMAND ${CMAKE_COMMAND} ${CMAKE_CURRENT_SOURCE_DIR} ${RECONFIG_FLAGS}
Expand Down Expand Up @@ -721,33 +723,42 @@ message("TBB_LIBRARIES = ${TBB_LIBRARIES}")
#message("TBB_LIBRARY_DIRS ${TBB_LIBRARY_DIRS}")
#message("TBB_LIBRARIES ${TBB_LIBRARIES} ")

find_package(libgff)
if(NOT LIBGFF_FOUND)
find_package(libgff 2.0.0
HINTS ${LIB_GFF_PATH} ${GFF_ROOT}
)
if(libgff_FOUND)
message(STATUS "libgff ver. ${LIB_GFF_VERSION} found.")
message(STATUS " include: ${LIB_GFF_INCLUDE_DIR}")
message(STATUS " lib : ${LIB_GFF_LIBRARY_DIR}")
endif()

if(NOT libgff_FOUND)
message("Build system will compile libgff")
message("==================================================================")
externalproject_add(libgff
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/libgff/archive/v1.2.tar.gz -o libgff.tgz &&
${SHASUM} bfabf143da828e8db251104341b934458c19d3e3c592d418d228de42becf98eb libgff.tgz &&
DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/libgff/archive/v2.0.0.tar.gz -o libgff.tgz &&
${SHASUM} 7656b19459a7ca7d2fd0fcec4f2e0fd0deec1b4f39c703a114e8f4c22d82a99c libgff.tgz &&
tar -xzvf libgff.tgz
##
#URL https://github.com/COMBINE-lab/libgff/archive/v1.1.tar.gz
#DOWNLOAD_NAME libff.tgz
#URL_HASH SHA1=37b3147d78391d1fabbe6a0df313fbf516abbc6f
#TLS_VERIFY FALSE
##
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/libgff-1.2
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/libgff-2.0.0
#UPDATE_COMMAND sh -c "mkdir -p <SOURCE_DIR>/build"
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/libgff-1.2/build
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${CMAKE_CURRENT_SOURCE_DIR}/external/install
BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/libgff-2.0.0/build
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR> -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
)
externalproject_add_step(libgff makedir
COMMAND mkdir -p <SOURCE_DIR>/build
COMMENT "Make build directory"
DEPENDEES download
DEPENDERS configure)
set(FETCHED_GFF TRUE)
set(LIB_GFF_PATH ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
endif()

# Because of the way that Apple has changed SIP
Expand Down
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ Give salmon a try! You can find the latest binary releases [here](https://githu

The current version number of the master branch of Salmon can be found [**here**](http://combine-lab.github.io/salmon/version_info/latest)

**NOTE**: Salmon works by (quasi)-mapping sequencing reads directly to the *transcriptome*. This means the Salmon index should be built on a set of target transcripts, **not** on the *genome* of the underlying organism. If indexing appears to be taking a very long time, or using a tremendous amount of memory (which it should not), please ensure that you are not attempting to build an index on the genome of your organism!

Documentation
==============

Expand Down
4 changes: 2 additions & 2 deletions current_version.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
VERSION_MAJOR 1
VERSION_MINOR 2
VERSION_PATCH 1
VERSION_MINOR 3
VERSION_PATCH 0
4 changes: 2 additions & 2 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@
# built documents.
#
# The short X.Y version.
version = '1.2'
version = '1.3'
# The full version, including alpha/beta/rc tags.
release = '1.2.1'
release = '1.3.0'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ MAINTAINER [email protected]

ENV PACKAGES git gcc make g++ libboost-all-dev liblzma-dev libbz2-dev \
ca-certificates zlib1g-dev libcurl4-openssl-dev curl unzip autoconf apt-transport-https ca-certificates gnupg software-properties-common wget
ENV SALMON_VERSION 1.2.1
ENV SALMON_VERSION 1.3.0

# salmon binary will be installed in /home/salmon/bin/salmon

Expand Down
2 changes: 1 addition & 1 deletion docker/build_test.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#! /bin/bash
SALMON_VERSION=1.2.1
SALMON_VERSION=1.3.0
docker build --no-cache -t combinelab/salmon:${SALMON_VERSION} -t combinelab/salmon:latest .
5 changes: 5 additions & 0 deletions include/AlevinOpts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ struct AlevinOpts {
bool dumpBFH;
// dump per cell level umi-graph
bool dumpUmiGraph;
// dump per cell level de-duplicated
// equivalence class
bool dumpCellEq;
//Stop progress sumps
bool quiet;
//flag for deduplication
Expand Down Expand Up @@ -80,6 +83,8 @@ struct AlevinOpts {
uint32_t maxNumBarcodes;
// number of bootstraps to perform
uint32_t numBootstraps;
// number of gibbs samples to perform
uint32_t numGibbsSamples;
// force the number of cells
uint32_t forceCells;
// define a close upper bound on expected number of cells
Expand Down
44 changes: 43 additions & 1 deletion include/AlignmentLibrary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@ extern "C" {
#include "SalmonOpts.hpp"
#include "SalmonUtils.hpp"
#include "SimplePosBias.hpp"
#include "SpinLock.hpp" // RapMap's with try_lock
#include "SpinLock.hpp" // From pufferfish, with try_lock
#include "Transcript.hpp"
#include "concurrentqueue.h"
#include "parallel_hashmap/phmap.h"

// Boost includes
#include <boost/filesystem.hpp>
Expand All @@ -38,6 +39,7 @@ extern "C" {
#include <functional>
#include <memory>
#include <vector>
#include <stdexcept>

template <typename T> class NullFragmentFilter;

Expand Down Expand Up @@ -109,8 +111,48 @@ template <typename FragT, typename EQBuilderT> class AlignmentLibrary {
// Figure out aligner information from the header if we can
aligner_ = salmon::bam_utils::inferAlignerFromHeader(header);

// in this case check for decoys and make a list of their names
phmap::flat_hash_set<std::string> decoys;
if (aligner_ == salmon::bam_utils::AlignerDetails::PUFFERFISH) {
// for each reference
for (decltype(header->nref) i = 0; i < header->nref; ++i) {
// for each tag
SAM_hdr_tag *tag;
for (tag = header->ref[i].tag; tag; tag = tag->next) {
// if this tag marks it as a decoy
if ((tag->len == 4) and (std::strncmp(tag->str, "DS:D", 4) == 0)) {
decoys.insert(header->ref[i].name);
break;
} // end if decoy tag

} // end for each tag
} // end for each referecne
}

if (!decoys.empty()) {
bq->forceEndParsing();
bq.reset();
salmonOpts.jointLog->error(
"Salmon is being run in alignment-mode with a SAM/BAM file that contains decoy\n"
"sequences (marked as such during salmon indexing). This SAM/BAM file had {}\n"
"such sequences tagged in the header. Since alignments to decoys are not\n"
"intended for decoy-level quantification, this functionality is not currently\n"
"supported. If you wish to run salmon with this SAM/BAM file, please \n"
"filter out / remove decoy transcripts (those tagged with `DS:D`) from the \n"
"header, and all SAM/BAM records that represent alignments to decoys \n"
"(those tagged with `XT:A:D`). If you believe you are receiving this message\n"
"in error, please report this issue on GitHub.", decoys.size());
salmonOpts.jointLog->flush();
std::stringstream ss;
ss << "\nCannot quantify from SAM/BAM file containing decoy transcripts or alignment records!\n";
throw std::runtime_error(ss.str());
}

// The transcript file existed, so load up the transcripts
double alpha = 0.005;
// we know how many we will have, so reserve the space for
// them.
transcripts_.reserve(header->nref);
for (decltype(header->nref) i = 0; i < header->nref; ++i) {
transcripts_.emplace_back(i, header->ref[i].name, header->ref[i].len,
alpha);
Expand Down
95 changes: 38 additions & 57 deletions include/AtomicMatrix.hpp
Original file line number Diff line number Diff line change
@@ -1,42 +1,53 @@
#ifndef ATOMIC_MATRIX
#define ATOMIC_MATRIX

#include "tbb/atomic.h"
#include "tbb/concurrent_vector.h"

#include "SalmonMath.hpp"
#include "SalmonUtils.hpp"

#include <vector>

template <typename T> class AtomicMatrix {
public:
AtomicMatrix() {
nRow_ = 0;
nCol_ = 0;
alpha_ = salmon::math::LOG_0;
logSpace_ = true;
}

AtomicMatrix(size_t nRow, size_t nCol, T alpha, bool logSpace = true)
: storage_(nRow * nCol, logSpace ? std::log(alpha) : alpha),
rowsums_(nRow, logSpace ? std::log(nCol * alpha) : nCol * alpha),
nRow_(nRow), nCol_(nCol), alpha_(alpha), logSpace_(logSpace) {}
: nRow_(nRow), nCol_(nCol), alpha_(alpha), logSpace_(logSpace) {

decltype(storage_) storage_tmp(nRow * nCol);
std::swap(storage_, storage_tmp);
T e = logSpace ? std::log(alpha) : alpha;
std::fill(storage_.begin(), storage_.end(), e);

decltype(rowsums_) rowsums_tmp(nRow);
std::swap(rowsums_, rowsums_tmp);
T ers = logSpace ? std::log(nCol * alpha) : nCol * alpha;
std::fill(rowsums_.begin(), rowsums_.end(), ers);
}

AtomicMatrix& operator=(AtomicMatrix&& o) {
std::swap(storage_, o.storage_);
std::swap(rowsums_, o.rowsums_);
nRow_ = o.nRow_;
nCol_ = o.nCol_;
alpha_ = o.alpha_;
logSpace_ = o.logSpace_;
return *this;
}

void incrementUnnormalized(size_t rowInd, size_t colInd, T amt) {
using salmon::math::logAdd;
size_t k = rowInd * nCol_ + colInd;
if (logSpace_) {
T oldVal = storage_[k];
T retVal = oldVal;
T newVal = logAdd(oldVal, amt);
do {
oldVal = retVal;
newVal = logAdd(oldVal, amt);
retVal = storage_[k].compare_and_swap(newVal, oldVal);
} while (retVal != oldVal);

salmon::utils::incLoopLog(storage_[k], amt);
} else {
T oldVal = storage_[k];
T retVal = oldVal;
T newVal = oldVal + amt;
do {
oldVal = retVal;
newVal = oldVal + amt;
retVal = storage_[k].compare_and_swap(newVal, oldVal);
} while (retVal != oldVal);
salmon::utils::incLoop(storage_[k], amt);
}
}

Expand All @@ -55,41 +66,11 @@ template <typename T> class AtomicMatrix {
using salmon::math::logAdd;
size_t k = rowInd * nCol_ + colInd;
if (logSpace_) {
T oldVal = storage_[k];
T retVal = oldVal;
T newVal = logAdd(oldVal, amt);
do {
oldVal = retVal;
newVal = logAdd(oldVal, amt);
retVal = storage_[k].compare_and_swap(newVal, oldVal);
} while (retVal != oldVal);

oldVal = rowsums_[rowInd];
retVal = oldVal;
newVal = logAdd(oldVal, amt);
do {
oldVal = retVal;
newVal = logAdd(oldVal, amt);
retVal = rowsums_[rowInd].compare_and_swap(newVal, oldVal);
} while (retVal != oldVal);
salmon::utils::incLoopLog(storage_[k], amt);
salmon::utils::incLoopLog(rowsums_[rowInd], amt);
} else {
T oldVal = storage_[k];
T retVal = oldVal;
T newVal = oldVal + amt;
do {
oldVal = retVal;
newVal = oldVal + amt;
retVal = storage_[k].compare_and_swap(newVal, oldVal);
} while (retVal != oldVal);

oldVal = rowsums_[rowInd];
retVal = oldVal;
newVal = oldVal + amt;
do {
oldVal = retVal;
newVal = oldVal + amt;
retVal = rowsums_[rowInd].compare_and_swap(newVal, oldVal);
} while (retVal != oldVal);
salmon::utils::incLoop(storage_[k], amt);
salmon::utils::incLoop(rowsums_[rowInd], amt);
}
}

Expand All @@ -106,8 +87,8 @@ template <typename T> class AtomicMatrix {
size_t nCol() const { return nCol_; }

private:
std::vector<tbb::atomic<T>> storage_;
std::vector<tbb::atomic<T>> rowsums_;
std::vector<std::atomic<T>> storage_;
std::vector<std::atomic<T>> rowsums_;
size_t nRow_, nCol_;
T alpha_;
bool logSpace_;
Expand Down
1 change: 0 additions & 1 deletion include/BAMQueue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include <iostream>
#include <memory>
#include <mutex>
#include <tbb/atomic.h>
#include <thread>
#include <vector>

Expand Down
2 changes: 1 addition & 1 deletion include/BAMQueue.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ void BAMQueue<FragT>::reset() {
template <typename FragT>
BAMQueue<FragT>::~BAMQueue() {
fmt::print(stderr, "\nFreeing memory used by read queue . . . ");
parsingThread_->join();
if (parsingThread_) { parsingThread_->join(); }
fmt::print(stderr, "\nJoined parsing thread . . . ");

for (auto& file : files_) {
Expand Down
Loading

0 comments on commit 642dc19

Please sign in to comment.