Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob Patro committed Dec 19, 2019
2 parents df3e6ab + 74e6760 commit c021886
Show file tree
Hide file tree
Showing 37 changed files with 765 additions and 423 deletions.
18 changes: 6 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.9)
cmake_minimum_required(VERSION 3.12)

if(DEFINED ENV{CC})
set(CC $ENV{CC})
Expand Down Expand Up @@ -543,20 +543,14 @@ if (NOT CEREAL_FOUND)
include(ExternalProject)
externalproject_add(libcereal
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
DOWNLOAD_COMMAND curl -k -L https://github.com/USCiLab/cereal/archive/v1.2.2.tar.gz -o cereal-v1.2.2.tar.gz &&
${SHASUM} 1921f26d2e1daf9132da3c432e2fd02093ecaedf846e65d7679ddf868c7289c4 cereal-v1.2.2.tar.gz &&
tar -xzvf cereal-v1.2.2.tar.gz
DOWNLOAD_COMMAND curl -k -L https://github.com/USCiLab/cereal/archive/v1.3.0.tar.gz -o cereal-v1.3.0.tar.gz &&
${SHASUM} 329ea3e3130b026c03a4acc50e168e7daff4e6e661bc6a7dfec0d77b570851d5 cereal-v1.3.0.tar.gz &&
tar -xzvf cereal-v1.3.0.tar.gz

##
#URL https://github.com/USCiLab/cereal/archive/v1.2.2.tar.gz
#DOWNLOAD_NAME cereal-v1.2.2.tar.gz
#TLS_VERIFY FALSE
#URL_HASH SHA1=ffddf5fc5313cfbb893e07823ca8c473084eebca
##
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.2.2
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.3.0
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
#UPDATE_COMMAND sh -c "mkdir -p <SOURCE_DIR>/build"
BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.2.2/build
BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.3.0/build
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND sh -c "mkdir -p <INSTALL_DIR>/include && cp -r <SOURCE_DIR>/include/cereal <INSTALL_DIR>/include"
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

### Pre-computed decoy transcriptomes

Although the precomputed decoys are still compatible with the latest major release (v1.0.0). We highly recommend updating your index using the full genome if memory permits, as it yeilds higher accuracy. For more information, please check our extensive benchmarking on comparing different alignment methods and their influence on RNA-seq quantification in the latest revised preprint [manuscript](https://www.biorxiv.org/content/10.1101/657874v2).
tl;dr: fast is good but fast and accurate is better !
Although the precomputed decoys (<=v.14.2) are still compatible with the latest major release (v1.0.0). We highly recommend updating your index using the full genome, as it gives significantly higher accuracy, for more information please check our extensive benchmarking on comparing different alignment methods and their performance on RNA-seq quantification in the latest revised preprint [manuscript](https://www.biorxiv.org/content/10.1101/657874v2).
Please use the [tutorial](https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/) for a step-by-step guide on how to efficiently index the reference transcriptome and genome for accurate gentrome based RNA-seq quantification.

**Facing problems with Indexing ?, Check if anyone else already had this problem in the issues section or fill the index generation [request form](https://forms.gle/3baJc5SYrkSWb1z48)**
Expand All @@ -27,6 +28,8 @@ Give salmon a try! You can find the latest binary releases [here](https://githu

The current version number of the master branch of Salmon can be found [**here**](http://combine-lab.github.io/salmon/version_info/latest)

**NOTE**: Salmon works by (quasi)-mapping sequencing reads directly to the *transcriptome*. This means the Salmon index should be built on a set of target transcripts, **not** on the *genome* of the underlying organism. If indexing appears to be taking a very long time, or using a tremendous amount of memory (which it should not), please ensure that you are not attempting to build an index on the genome of your organism!

Documentation
==============

Expand Down
4 changes: 2 additions & 2 deletions cmake/SimpleTest.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ if (TAR_RESULT)
message(FATAL_ERROR "Error untarring sample_data.tgz")
endif()

set(INDEX_CMD ${TOPLEVEL_DIR}/build/src/sailfish index -t transcripts.fasta -k 20 -o sample_index --force)
set(INDEX_CMD ${TOPLEVEL_DIR}/build/src/sailfish --no-version-check index -t transcripts.fasta -k 20 -o sample_index --force)
execute_process(COMMAND ${INDEX_CMD}
WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data
RESULT_VARIABLE INDEX_RESULT
Expand All @@ -17,7 +17,7 @@ if (INDEX_RESULT)
message(FATAL_ERROR "Error running ${INDEX_COMMAND}")
endif()

set(QUANT_COMMAND ${TOPLEVEL_DIR}/build/src/sailfish quant -i sample_index --noBiasCorrect -l IU -1 reads_1.fastq -2 reads_2.fastq -o sample_quant)
set(QUANT_COMMAND ${TOPLEVEL_DIR}/build/src/sailfish --no-version-check quant -i sample_index --noBiasCorrect -l IU -1 reads_1.fastq -2 reads_2.fastq -o sample_quant)
execute_process(COMMAND ${QUANT_COMMAND}
WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data
RESULT_VARIABLE QUANT_RESULT
Expand Down
2 changes: 1 addition & 1 deletion current_version.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
VERSION_MAJOR 1
VERSION_MINOR 0
VERSION_MINOR 1
VERSION_PATCH 0
4 changes: 2 additions & 2 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@
# built documents.
#
# The short X.Y version.
version = '1.0'
version = '1.1'
# The full version, including alpha/beta/rc tags.
release = '1.0.0'
release = '1.1.0'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
11 changes: 6 additions & 5 deletions doc/steps_to_prepare_release.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
# Steps to prepare a release of Salmon
# Steps to prepare a release of salmon
-----

1. Tag corresponding commit of RapMap so that it can be stably pulled in for source builds.
2. Alter `fetchRapMap.sh` to fetch the corresponding tagged version (and update the sha256 sum).
1. Tag corresponding commit of pufferfish so that it can be stably pulled in for source builds.
2. Alter `fetchPufferfish.sh` to fetch the corresponding tagged version (and update the sha256 sum).
3. Bump salmon version in `include/SalmonConfig.hpp`, then rebuild and run the `bump_version.sh` script.
4. Ensure that everything builds cleanly on Linux (taken care of by CI) and OSX.
5. Merge the develop branch changes into master.
6. Tag the Salmon release with a new version number.
6. Tag the salmon release with a new version number.
7. Update the docker tag and build an image for docker hub.
8. Bump the Bioconda version and build a new Bioconda release.
9. Add release notes for the tagged master version.
10. Upload the pre-compiled linux binary (from the CI server) to GitHub.
11. (not technically part of release) Reset the relevant changes (steps 1,2) on the develop branch so they now point to a non-tagged RapMap.
11. Place a new version file on the website and update the old one.
12. (not technically part of release) Reset the relevant changes (steps 1,2) on the develop branch so they now point to a non-tagged RapMap.
23 changes: 20 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
FROM ubuntu:18.04
MAINTAINER [email protected]

ENV PACKAGES git gcc make g++ cmake libboost-all-dev liblzma-dev libbz2-dev \
ca-certificates zlib1g-dev curl unzip autoconf
ENV SALMON_VERSION 1.0.0
ENV PACKAGES git gcc make g++ libboost-all-dev liblzma-dev libbz2-dev \
ca-certificates zlib1g-dev libcurl4-openssl-dev curl unzip autoconf apt-transport-https ca-certificates gnupg software-properties-common wget
ENV SALMON_VERSION 1.1.0

# salmon binary will be installed in /home/salmon/bin/salmon

Expand All @@ -15,9 +15,22 @@ ENV SALMON_VERSION 1.0.0
WORKDIR /home

RUN apt-get update && \
apt remove -y libcurl4 && \
apt-get install -y --no-install-recommends ${PACKAGES} && \
apt-get clean

RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add -

RUN apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'

RUN apt-get update

RUN apt-key --keyring /etc/apt/trusted.gpg del C1F34CDD40CD72DA

RUN apt-get install kitware-archive-keyring

RUN apt-get install -y cmake

RUN curl -k -L https://github.com/COMBINE-lab/salmon/archive/v${SALMON_VERSION}.tar.gz -o salmon-v${SALMON_VERSION}.tar.gz && \
tar xzf salmon-v${SALMON_VERSION}.tar.gz && \
cd salmon-${SALMON_VERSION} && \
Expand All @@ -34,3 +47,7 @@ RUN curl -k -L https://github.com/COMBINE-lab/salmon/archive/v${SALMON_VERSION}.
# cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local && make && make install

ENV PATH /home/salmon-${SALMON_VERSION}/bin:${PATH}
ENV LD_LIBRARY_PATH "/usr/local/lib:${LD_LIBRARY_PATH}"

RUN echo "export PATH=$PATH" > /etc/environment
RUN echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH" > /etc/environment
2 changes: 1 addition & 1 deletion docker/build_test.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#! /bin/bash
SALMON_VERSION=1.0.0
SALMON_VERSION=1.1.0
docker build --no-cache -t combinelab/salmon:${SALMON_VERSION} -t combinelab/salmon:latest .
2 changes: 1 addition & 1 deletion include/AlevinHash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "spdlog/spdlog.h"

#include "AlevinOpts.hpp"
#include "AlevinUtils.hpp"
#include "SingleCellProtocols.hpp"
#include "GZipWriter.hpp"
#include "TranscriptGroup.hpp"
Expand All @@ -29,7 +30,6 @@ void alevinOptimize( std::vector<std::string>& trueBarcodesVec,

template <typename ProtocolT>
int salmonHashQuantify(AlevinOpts<ProtocolT>& aopt,
bfs::path& indexDirectory,
bfs::path& outputDirectory,
CFreqMapT& freqCounter);
#endif // __ALEVIN_HASH_HPP__
2 changes: 2 additions & 0 deletions include/AlevinOpts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ struct AlevinOpts {
protocolT protocol;
//dump barcodes fq files
bool dumpfq;
// dump Arborescence Fragment Counts
bool dumpArborescences;
//dump CB features for whitelisting
bool dumpfeatures;
//eqclass level barcode count
Expand Down
7 changes: 5 additions & 2 deletions include/AlevinUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ namespace alevin{
namespace utils{

namespace apt = alevin::protocols;
namespace bfs = boost::filesystem;

constexpr uint32_t uint32_max = std::numeric_limits<uint32_t>::max();

Expand All @@ -68,6 +69,9 @@ namespace alevin{

bool recoverBarcode(std::string& sequence);

void readWhitelist(bfs::path& filePath,
TrueBcsT& trueBarcodes);

template <typename ProtocolT>
bool processAlevinOpts(AlevinOpts<ProtocolT>& aopt,
SalmonOpts& sopt,
Expand All @@ -84,10 +88,9 @@ namespace alevin{
template <typename OrderedOptionsT>
bool writeCmdInfo(boost::filesystem::path cmdInfoPath,
OrderedOptionsT& orderedOptions) {
namespace bfs = boost::filesystem;
std::ofstream os(cmdInfoPath.string());
cereal::JSONOutputArchive oa(os);
oa(cereal::make_nvp("Salmon_version:", std::string(salmon::version)));
oa(cereal::make_nvp("salmon_version:", std::string(salmon::version)));
for (auto& opt : orderedOptions.options) {
if (opt.value.size() == 1) {
oa(cereal::make_nvp(opt.string_key, opt.value.front()));
Expand Down
10 changes: 2 additions & 8 deletions include/BarcodeModel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ namespace alevin{
/*
Caluclates Probability argmax_A {P(b | A,d)}
*/
template <typename ProtocolT>
bool calculateAlnProbability(AlevinOpts<ProtocolT>& aopt,
const std::string& s1,
bool calculateAlnProbability(const std::string& s1,
const std::string& s2,
double& probability){
int32_t l1{static_cast<int32_t>(s1.size())}, l2 {static_cast<int32_t>(s2.size())};
Expand Down Expand Up @@ -157,11 +155,8 @@ namespace alevin{
return idx;
}

template <typename ProtocolT>
void coinTossBarcodeModel(std::string barcode,
AlevinOpts<ProtocolT>& aopt,
const std::vector<std::string>& trueBarcodes,
const CFreqMapT& freqCounter,
MapT& dumpPair){
if(trueBarcodes.size() == 1){
dumpPair.push_back(std::make_pair(trueBarcodes.front(), 1.0));
Expand All @@ -172,8 +167,7 @@ namespace alevin{

for(const std::string trueBarcode: trueBarcodes){
//save the sequence of the true barcodes for dumping
bool isOneEdit = calculateAlnProbability(aopt,
trueBarcode,
bool isOneEdit = calculateAlnProbability(trueBarcode,
barcode,
alnProbability);
if(!isOneEdit){
Expand Down
5 changes: 3 additions & 2 deletions include/CollapsedCellOptimizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "concurrentqueue.h"

#include <boost/math/special_functions/digamma.hpp>
#include <boost/random.hpp>

namespace bfs = boost::filesystem;
using JqueueT = moodycamel::ConcurrentQueue<uint32_t>;
Expand Down Expand Up @@ -77,11 +78,11 @@ void optimizeCell(std::vector<std::string>& trueBarcodes,
std::vector<CellState>& skippedCB,
bool verbose, GZipWriter& gzw, bool noEM, bool useVBEM,
bool quiet, tbb::atomic<double>& totalDedupCounts,
tbb::atomic<uint32_t>& totalExpGeneCounts,
tbb::atomic<uint32_t>& totalExpGeneCounts, double priorWeight,
spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
uint32_t numGenes, uint32_t umiLength, uint32_t numBootstraps,
bool naiveEqclass, bool dumpUmiGraph, bool useAllBootstraps,
bool initUniform, CFreqMapT& freqCounter,
bool initUniform, CFreqMapT& freqCounter, bool dumpArboFragCounts,
spp::sparse_hash_set<uint32_t>& mRnaGenes,
spp::sparse_hash_set<uint32_t>& rRnaGenes,
std::atomic<uint64_t>& totalUniEdgesCounts,
Expand Down
3 changes: 2 additions & 1 deletion include/DedupUMI.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ bool dedupClasses(std::vector<double>& geneAlphas,
std::vector<uint8_t>& tiers,
GZipWriter& gzw, uint32_t umiEditDistance,
bool dumpUmiGraph, std::string& trueBarcodeStr,
spp::sparse_hash_map<uint16_t, uint32_t>& numMolHash,
std::vector<spp::sparse_hash_map<uint16_t, uint16_t>>& arboEqClassCount,
bool dumpArborescences,
std::atomic<uint64_t>& totalUniEdgesCounts,
std::atomic<uint64_t>& totalBiEdgesCounts);

Expand Down
108 changes: 0 additions & 108 deletions include/Filter.hpp

This file was deleted.

Loading

0 comments on commit c021886

Please sign in to comment.